├── .gitignore ├── LICENSE.md ├── README.md ├── lparser.py ├── lundump.py └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | example.* 2 | __pycache__ 3 | NOTES.md 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020-2021 LuaDecompy Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LuaDecompy 2 | 3 | An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o `). 4 | 5 | You will quickly find that only **extremely** simple scripts are decompiled successfully right now. This is an experimental project and not all opcodes are properly handled for now. If you need a real decompiler I would recommend any of the handful of ones that exist already. 6 | 7 | ## Why? 8 | 9 | Lua has a relatively small instruction set (only 38 different opcodes!). This makes it pretty feasible for a weekend decompiler project. (real) Decompilers are extremely complex pieces of software, so being able to write a simpler one helps show the theory without *much* of the headache. 10 | 11 | ## Example usage 12 | 13 | ```sh 14 | > cat example.lua && luac5.1 -o example.luac example.lua 15 | local printMsg = function(append) 16 | local tbl = {"He", "llo", " ", "Wo"} 17 | local str = "" 18 | 19 | for i = 1, #tbl do 20 | str = str .. tbl[i] 21 | end 22 | 23 | print(str .. append) 24 | end 25 | 26 | printMsg("rld!") 27 | > python main.py example.luac 28 | example.luac 29 | 30 | ==== [[example.lua's constants]] ==== 31 | 32 | 0: [STRING] rld! 33 | 34 | ==== [[example.lua's locals]] ==== 35 | 36 | R[0]: printMsg 37 | 38 | ==== [[example.lua's dissassembly]] ==== 39 | 40 | [ 0] CLOSURE : R[0] 0 ; 41 | [ 1] MOVE : 1 0 0 ; move R[0] into R[1] 42 | [ 2] LOADK : R[2] K[0] ; load "rld!" into R[2] 43 | [ 3] CALL : 1 2 1 ; 44 | [ 4] RETURN : 0 1 0 ; 45 | 46 | ==== [[example.lua's protos]] ==== 47 | 48 | 49 | ==== [['s constants]] ==== 50 | 51 | 0: [STRING] He 52 | 1: [STRING] llo 53 | 2: [STRING] 54 | 3: [STRING] Wo 55 | 4: [STRING] 56 | 5: [NUMBER] 1.0 57 | 6: [STRING] print 58 | 59 | ==== [['s locals]] ==== 60 | 61 | R[0]: append 62 | R[1]: tbl 63 | R[2]: str 64 | R[3]: (for index) 65 | R[4]: (for limit) 66 | R[5]: (for step) 67 | R[6]: i 68 | 69 | ==== [['s dissassembly]] ==== 70 | 71 | [ 0] NEWTABLE : 1 4 0 ; 72 | [ 1] LOADK : R[2] K[0] ; load "He" into R[2] 73 | [ 2] LOADK : R[3] K[1] ; load "llo" into R[3] 74 | [ 3] LOADK : R[4] K[2] ; load " " into R[4] 75 | [ 4] LOADK : R[5] K[3] ; load "Wo" into R[5] 76 | [ 5] SETLIST : 1 4 1 ; 77 | [ 6] LOADK : R[2] K[4] ; load "" into R[2] 78 | [ 7] LOADK : R[3] K[5] ; load 1 into R[3] 79 | [ 8] LEN : 4 1 0 ; 80 | [ 9] LOADK : R[5] K[5] ; load 1 into R[5] 81 | [ 10] FORPREP : R[3] 3 ; 82 | [ 11] MOVE : 7 2 0 ; move R[2] into R[7] 83 | [ 12] GETTABLE : R[8] 1 R[6] ; 84 | [ 13] CONCAT : 2 7 8 ; concat 2 values from R[7] to R[8], store into R[2] 85 | [ 14] FORLOOP : R[3] -4 ; 86 | [ 15] GETGLOBAL : R[3] K[6] ; move _G["print"] into R[3] 87 | [ 16] MOVE : 4 2 0 ; move R[2] into R[4] 88 | [ 17] MOVE : 5 0 0 ; move R[0] into R[5] 89 | [ 18] CONCAT : 4 4 5 ; concat 2 values from R[4] to R[5], store into R[4] 90 | [ 19] CALL : 3 2 1 ; 91 | [ 20] RETURN : 0 1 0 ; 92 | 93 | ==== [[example.lua's pseudo-code]] ==== 94 | 95 | local printMsg = function(append) 96 | local tbl = {"He", "llo", " ", "Wo", } 97 | local str = "" 98 | for i = 1, #tbl, 1 do 99 | str = str .. tbl[i] 100 | end 101 | print(str .. append) 102 | end 103 | 104 | printMsg("rld!") 105 | 106 | ``` -------------------------------------------------------------------------------- /lparser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | lparser.py 3 | 4 | Depends on lundump.py for lua dump deserialization. 5 | 6 | An experimental bytecode decompiler. 7 | ''' 8 | 9 | from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK 10 | 11 | class _Scope: 12 | def __init__(self, startPC: int, endPC: int): 13 | self.startPC = startPC 14 | self.endPC = endPC 15 | 16 | class _Traceback: 17 | def __init__(self): 18 | self.sets = [] 19 | self.uses = [] 20 | self.isConst = False 21 | 22 | class _Line: 23 | def __init__(self, startPC: int, endPC: int, src: str, scope: int): 24 | self.startPC = startPC 25 | self.endPC = endPC 26 | self.src = src 27 | self.scope = scope 28 | 29 | def isValidLocal(ident: str) -> bool: 30 | # has to start with an alpha or _ 31 | if ident[0] not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_": 32 | return False 33 | 34 | # then it can be alphanum or _ 35 | for c in ident[1:]: 36 | if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_": 37 | return False 38 | 39 | return True 40 | 41 | class LuaDecomp: 42 | def __init__(self, chunk: Chunk, headChunk: bool = True, scopeOffset: int = 0): 43 | self.chunk = chunk 44 | self.pc = 0 45 | self.scope: list[_Scope] = [] 46 | self.lines: list[_Line] = [] 47 | self.top = {} 48 | self.locals = {} 49 | self.traceback = {} 50 | self.unknownLocalCount = 0 51 | self.headChunk = headChunk 52 | self.scopeOffset = scopeOffset # number of scopes this chunk/proto is in 53 | self.src: str = "" 54 | 55 | # configurations! 56 | self.aggressiveLocals = False # should *EVERY* set register be considered a local? 57 | self.annotateLines = False 58 | self.indexWidth = 4 # how many spaces for indentions? 59 | 60 | self.__loadLocals() 61 | 62 | if not self.headChunk: 63 | functionProto = "function(" 64 | 65 | # define params 66 | for i in range(self.chunk.numParams): 67 | # add param to function prototype (also make a local in the register if it doesn't exist) 68 | functionProto += ("%s, " if i+1 < self.chunk.numParams else "%s") % self.__makeLocalIdentifier(i) 69 | 70 | # mark local as defined 71 | self.__addSetTraceback(i) 72 | functionProto += ")" 73 | 74 | self.__startScope(functionProto, 0, len(self.chunk.instructions)) 75 | 76 | # parse instructions 77 | while self.pc < len(self.chunk.instructions): 78 | self.parseInstr() 79 | self.pc += 1 80 | 81 | # end the scope (if we're supposed too) 82 | self.__checkScope() 83 | 84 | if not self.headChunk: 85 | self.__endScope() 86 | 87 | def getPseudoCode(self) -> str: 88 | fullSrc = "" 89 | 90 | for line in self.lines: 91 | if self.annotateLines: 92 | fullSrc += "-- PC: %d to PC: %d\n" % (line.startPC, line.endPC) 93 | fullSrc += ((' ' * self.indexWidth) * (line.scope + self.scopeOffset)) + line.src + "\n" 94 | 95 | return fullSrc 96 | 97 | # =======================================[[ Helpers ]]========================================= 98 | 99 | def __getInstrAtPC(self, pc: int) -> Instruction: 100 | if pc < len(self.chunk.instructions): 101 | return self.chunk.instructions[pc] 102 | 103 | raise Exception("Decompilation failed!") 104 | 105 | def __getNextInstr(self) -> Instruction: 106 | return self.__getInstrAtPC(self.pc + 1) 107 | 108 | def __getCurrInstr(self) -> Instruction: 109 | return self.__getInstrAtPC(self.pc) 110 | 111 | def __makeTracIfNotExist(self) -> None: 112 | if not self.pc in self.traceback: 113 | self.traceback[self.pc] = _Traceback() 114 | 115 | # when we read from a register, call this 116 | def __addUseTraceback(self, reg: int) -> None: 117 | self.__makeTracIfNotExist() 118 | self.traceback[self.pc].uses.append(reg) 119 | 120 | # when we write from a register, call this 121 | def __addSetTraceback(self, reg: int) -> None: 122 | self.__makeTracIfNotExist() 123 | self.traceback[self.pc].sets.append(reg) 124 | 125 | def __addExpr(self, code: str) -> None: 126 | self.src += code 127 | 128 | def __endStatement(self): 129 | startPC = self.lines[len(self.lines) - 1].endPC + 1 if len(self.lines) > 0 else 0 130 | endPC = self.pc 131 | 132 | # make sure we don't write an empty line 133 | if not self.src == "": 134 | self.lines.append(_Line(startPC, endPC, self.src, len(self.scope))) 135 | self.src = "" 136 | 137 | def __insertStatement(self, pc: int) -> None: 138 | # insert current statement into lines at pc location 139 | for i in range(len(self.lines)): 140 | if self.lines[i].startPC <= pc and self.lines[i].endPC >= pc: 141 | self.lines.insert(i, _Line(pc, pc, self.src, self.lines[i-1].scope if i > 0 else 0)) 142 | self.src = "" 143 | return i 144 | 145 | self.src = "" 146 | 147 | # walks traceback, if local wasn't set before, the local needs to be defined 148 | def __needsDefined(self, reg) -> bool: 149 | for _, trace in self.traceback.items(): 150 | if reg in trace.sets: 151 | return False 152 | 153 | # wasn't set in traceback! needs defined! 154 | return True 155 | 156 | def __loadLocals(self): 157 | for i in range(len(self.chunk.locals)): 158 | name = self.chunk.locals[i].name 159 | if isValidLocal(name): 160 | self.locals[i] = name 161 | elif "(for " not in name: # if it's a for loop register, ignore 162 | self.__makeLocalIdentifier(i) 163 | 164 | # when you *know* the register *has* to be a local (for loops, etc.) 165 | def __getLocal(self, indx: int) -> str: 166 | return self.locals[indx] if indx in self.locals else self.__makeLocalIdentifier(indx) 167 | 168 | def __getReg(self, indx: int) -> str: 169 | self.__addUseTraceback(indx) 170 | 171 | # if the top indx is a local, get it 172 | return self.locals[indx] if indx in self.locals else self.top[indx] 173 | 174 | def __setReg(self, indx: int, code: str, forceLocal: bool = False) -> None: 175 | # if the top indx is a local, set it 176 | if indx in self.locals: 177 | if self.__needsDefined(indx): 178 | self.__newLocal(indx, code) 179 | else: 180 | self.__addExpr(self.locals[indx] + " = " + code) 181 | self.__endStatement() 182 | elif self.aggressiveLocals or forceLocal: # 'every register is a local!!' 183 | self.__newLocal(indx, code) 184 | 185 | self.__addSetTraceback(indx) 186 | self.top[indx] = code 187 | 188 | # ========================================[[ Locals ]]========================================= 189 | 190 | def __makeLocalIdentifier(self, indx: int) -> str: 191 | # first, check if we have a local name already determined 192 | if indx in self.locals: 193 | return self.locals[indx] 194 | 195 | # otherwise, generate a local 196 | self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount 197 | self.unknownLocalCount += 1 198 | 199 | return self.locals[indx] 200 | 201 | def __newLocal(self, indx: int, expr: str) -> None: 202 | self.__makeLocalIdentifier(indx) 203 | 204 | self.__addExpr("local " + self.locals[indx] + " = " + expr) 205 | self.__endStatement() 206 | 207 | # ========================================[[ Scopes ]]========================================= 208 | 209 | def __startScope(self, scopeType: str, start: int, size: int) -> None: 210 | self.__addExpr(scopeType) 211 | self.__endStatement() 212 | self.scope.append(_Scope(start, start + size)) 213 | 214 | # checks if we need to end a scope 215 | def __checkScope(self) -> None: 216 | if len(self.scope) == 0: 217 | return 218 | 219 | if self.pc > self.scope[len(self.scope) - 1].endPC: 220 | self.__endScope() 221 | 222 | def __endScope(self) -> None: 223 | self.__endStatement() 224 | self.__addExpr("end") 225 | self.scope.pop() 226 | 227 | self.__endStatement() 228 | 229 | # =====================================[[ Instructions ]]====================================== 230 | 231 | def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: 232 | self.__setReg(a, "(" + b + op + c + ")") 233 | 234 | # handles conditional jumps 235 | def __condJmp(self, op: str, rkBC: bool = True): 236 | instr = self.__getCurrInstr() 237 | jmpType = "if" 238 | scopeStart = "then" 239 | 240 | # we need to check if the jmp location has a jump back (if so, it's a while loop) 241 | jmp = self.__getNextInstr().B + 1 242 | jmpToInstr = self.__getInstrAtPC(self.pc + jmp) 243 | 244 | if jmpToInstr.opcode == Opcodes.JMP: 245 | # if this jump jumps back to this compJmp, it's a loop! 246 | if self.pc + jmp + jmpToInstr.B <= self.pc + 1: 247 | jmpType = "while" 248 | scopeStart = "do" 249 | elif jmp < 0: 250 | # 'repeat until' loop (probably) 251 | jmpType = "until" 252 | scopeStart = None 253 | 254 | if instr.A > 0: 255 | self.__addExpr("%s not " % jmpType) 256 | else: 257 | self.__addExpr("%s " % jmpType) 258 | 259 | # write actual comparison 260 | if rkBC: 261 | self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") 262 | else: # just testing rkB 263 | self.__addExpr(op + self.__readRK(instr.B)) 264 | 265 | self.pc += 1 # skip next instr 266 | if scopeStart: 267 | self.__startScope("%s " % scopeStart, self.pc - 1, jmp) 268 | 269 | # we end the statement *after* scopeStart 270 | self.__endStatement() 271 | else: 272 | # end the statement prior to repeat 273 | self.__endStatement() 274 | 275 | # it's a repeat until loop, insert 'repeat' at the jumpTo location 276 | self.__addExpr("repeat") 277 | insertedLine = self.__insertStatement(self.pc + jmp) 278 | 279 | # add scope to every line in-between 280 | for i in range(insertedLine+1, len(self.lines)-1): 281 | self.lines[i].scope += 1 282 | 283 | # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which 284 | def __readRK(self, rk: int) -> str: 285 | if (whichRK(rk)) > 0: 286 | return self.chunk.getConstant(readRKasK(rk)).toCode() 287 | else: 288 | return self.__getReg(rk) 289 | 290 | # walk & peak ahead NEWTABLE 291 | def __parseNewTable(self, indx: int): 292 | # TODO: parse SETTABLE too? 293 | tblOps = [Opcodes.LOADK, Opcodes.SETLIST] 294 | 295 | instr = self.__getNextInstr() 296 | cachedRegs = {} 297 | tbl = "{" 298 | while instr.opcode in tblOps: 299 | if instr.opcode == Opcodes.LOADK: # operate on registers 300 | cachedRegs[instr.A] = self.chunk.getConstant(instr.B).toCode() 301 | elif instr.opcode == Opcodes.SETLIST: 302 | numElems = instr.B 303 | 304 | for i in range(numElems): 305 | tbl += "%s, " % cachedRegs[instr.A + i + 1] 306 | del cachedRegs[instr.A + i + 1] 307 | 308 | self.pc += 1 309 | instr = self.__getNextInstr() 310 | tbl += "}" 311 | 312 | # i use forceLocal here even though i don't know *for sure* that the register is a local. 313 | # this does help later though if the table is reused (which is 99% of the time). the other 1% 314 | # only affects syntax and may look a little weird but is fine and equivalent non-the-less 315 | self.__setReg(indx, tbl, forceLocal=True) 316 | self.__endStatement() 317 | 318 | # if we have leftovers... oops, set those 319 | for i, v in cachedRegs.items(): 320 | self.__setReg(i, v) 321 | 322 | def parseInstr(self): 323 | instr = self.__getCurrInstr() 324 | 325 | match instr.opcode: 326 | case Opcodes.MOVE: # move is a fake ABC instr, C is ignored 327 | # move registers 328 | self.__setReg(instr.A, self.__getReg(instr.B)) 329 | case Opcodes.LOADK: 330 | self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode()) 331 | case Opcodes.LOADBOOL: 332 | if instr.B == 0: 333 | self.__setReg(instr.A, "false") 334 | else: 335 | self.__setReg(instr.A, "true") 336 | case Opcodes.GETGLOBAL: 337 | self.__setReg(instr.A, self.chunk.getConstant(instr.B).data) 338 | case Opcodes.GETTABLE: 339 | self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") 340 | case Opcodes.SETGLOBAL: 341 | self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) 342 | self.__endStatement() 343 | case Opcodes.SETTABLE: 344 | self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) 345 | self.__endStatement() 346 | case Opcodes.NEWTABLE: 347 | self.__parseNewTable(instr.A) 348 | case Opcodes.ADD: 349 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") 350 | case Opcodes.SUB: 351 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ") 352 | case Opcodes.MUL: 353 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ") 354 | case Opcodes.DIV: 355 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ") 356 | case Opcodes.MOD: 357 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ") 358 | case Opcodes.POW: 359 | self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ") 360 | case Opcodes.UNM: 361 | self.__setReg(instr.A, "-" + self.__getReg(instr.B)) 362 | case Opcodes.NOT: 363 | self.__setReg(instr.A, "not " + self.__getReg(instr.B)) 364 | case Opcodes.LEN: 365 | self.__setReg(instr.A, "#" + self.__getReg(instr.B)) 366 | case Opcodes.CONCAT: 367 | count = instr.C-instr.B+1 368 | concatStr = "" 369 | 370 | # concat all items on stack from RC to RB 371 | for i in range(count): 372 | concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "") 373 | 374 | self.__setReg(instr.A, concatStr) 375 | case Opcodes.JMP: 376 | pass 377 | case Opcodes.EQ: 378 | self.__condJmp(" == ") 379 | case Opcodes.LT: 380 | self.__condJmp(" < ") 381 | case Opcodes.LE: 382 | self.__condJmp(" <= ") 383 | case Opcodes.TEST: 384 | if instr.C == 0: 385 | self.__condJmp("", False) 386 | else: 387 | self.__condJmp("not ", False) 388 | case Opcodes.CALL: 389 | preStr = "" 390 | callStr = "" 391 | ident = "" 392 | 393 | # parse arguments 394 | callStr += self.__getReg(instr.A) + "(" 395 | for i in range(instr.A + 1, instr.A + instr.B): 396 | callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "") 397 | callStr += ")" 398 | 399 | # parse return values 400 | if instr.C > 1: 401 | preStr = "local " 402 | for indx in range(instr.A, instr.A + instr.C - 1): 403 | if indx in self.locals: 404 | ident = self.locals[indx] 405 | else: 406 | ident = self.__makeLocalIdentifier(indx) 407 | preStr += ident 408 | 409 | # normally setReg() does this 410 | self.top[indx] = ident 411 | 412 | # just so we don't have a trailing ', ' 413 | preStr += ", " if not indx == instr.A + instr.C - 2 else "" 414 | preStr += " = " 415 | 416 | self.__addExpr(preStr + callStr) 417 | self.__endStatement() 418 | case Opcodes.RETURN: 419 | self.__endStatement() 420 | pass # no-op for now 421 | case Opcodes.FORLOOP: 422 | pass # no-op for now 423 | case Opcodes.FORPREP: 424 | self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2))) 425 | self.__startScope("do", self.pc, instr.B) 426 | case Opcodes.SETLIST: 427 | # LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST 428 | # eg. 429 | # [ 49] LOADK : R[49] K[1] ; load 0.0 into R[49] 430 | # [ 50] LOADK : R[50] K[1] ; load 0.0 into R[50] 431 | # [ 51] SETLIST : 0 50 1 ; sets list[1..50] 432 | # [ 52] LOADK : R[1] K[1] ; load 0.0 into R[1] 433 | # [ 53] SETLIST : 0 1 2 ; sets list[51..51] 434 | numElems = instr.B 435 | startAt = ((instr.C - 1) * 50) 436 | ident = self.__getLocal(instr.A) 437 | 438 | # set each index (TODO: make tables less verbose) 439 | for i in range(numElems): 440 | self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1))) 441 | self.__endStatement() 442 | case Opcodes.CLOSURE: 443 | proto = LuaDecomp(self.chunk.protos[instr.B], headChunk=False, scopeOffset=len(self.scope)) 444 | self.__setReg(instr.A, proto.getPseudoCode()) 445 | case _: 446 | raise Exception("unsupported instruction: %s" % instr.toString()) -------------------------------------------------------------------------------- /lundump.py: -------------------------------------------------------------------------------- 1 | ''' 2 | l(un)dump.py 3 | 4 | A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the 5 | chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, 6 | regardless of the host machine. 7 | 8 | For details on the Lua5.1 bytecode format, I read [this PDF](https://archive.org/download/a-no-frills-intro-to-lua-5.1-vm-instructions/a-no-frills-intro-to-lua-5.1-vm-instructions_archive.torrent) 9 | as well as read the lundump.c source file from the Lua5.1 source. 10 | ''' 11 | 12 | import struct 13 | import array 14 | from enum import IntEnum, Enum, auto 15 | 16 | class InstructionType(Enum): 17 | ABC = auto(), 18 | ABx = auto(), 19 | AsBx = auto() 20 | 21 | class Opcodes(IntEnum): 22 | MOVE = 0, 23 | LOADK = 1, 24 | LOADBOOL = 2, 25 | LOADNIL = 3, 26 | GETUPVAL = 4, 27 | GETGLOBAL = 5, 28 | GETTABLE = 6, 29 | SETGLOBAL = 7, 30 | SETUPVAL = 8, 31 | SETTABLE = 9, 32 | NEWTABLE = 10, 33 | SELF = 11, 34 | ADD = 12, 35 | SUB = 13, 36 | MUL = 14, 37 | DIV = 15, 38 | MOD = 16, 39 | POW = 17, 40 | UNM = 18, 41 | NOT = 19, 42 | LEN = 20, 43 | CONCAT = 21, 44 | JMP = 22, 45 | EQ = 23, 46 | LT = 24, 47 | LE = 25, 48 | TEST = 26, 49 | TESTSET = 27, 50 | CALL = 28, 51 | TAILCALL = 29, 52 | RETURN = 30, 53 | FORLOOP = 31, 54 | FORPREP = 32, 55 | TFORLOOP = 33, 56 | SETLIST = 34, 57 | CLOSE = 35, 58 | CLOSURE = 36, 59 | VARARG = 37 60 | 61 | class ConstType(IntEnum): 62 | NIL = 0, 63 | BOOL = 1, 64 | NUMBER = 3, 65 | STRING = 4, 66 | 67 | _RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT] 68 | _RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] 69 | _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL] 70 | 71 | _LUAMAGIC = b'\x1bLua' 72 | 73 | # is an 'RK' value a K? (result is true for K, false for R) 74 | def whichRK(rk: int): 75 | return (rk & (1 << 8)) > 0 76 | 77 | # read an RK as a K 78 | def readRKasK(rk: int): 79 | return (rk & ~(1 << 8)) 80 | 81 | class Instruction: 82 | def __init__(self, type: InstructionType, name: str) -> None: 83 | self.type = type 84 | self.name = name 85 | self.opcode: int = None 86 | self.A: int = None 87 | self.B: int = None 88 | self.C: int = None 89 | 90 | # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which 91 | def __formatRK(self, rk: int) -> str: 92 | if whichRK(rk): 93 | return "K[" + str(readRKasK(rk)) + "]" 94 | else: 95 | return "R[" + str(rk) + "]" 96 | 97 | def toString(self): 98 | instr = "%10s" % self.name 99 | regs = "" 100 | 101 | if self.type == InstructionType.ABC: 102 | # by default, treat them as registers 103 | A = "%d" % self.A 104 | B = "%d" % self.B 105 | C = "%d" % self.C 106 | 107 | # these opcodes have RKs for B & C 108 | if self.opcode in _RKBCInstr: 109 | A = "R[%d]" % self.A 110 | B = self.__formatRK(self.B) 111 | C = self.__formatRK(self.C) 112 | elif self.opcode in _RKCInstr: # just for C 113 | A = "R[%d]" % self.A 114 | C = self.__formatRK(self.C) 115 | 116 | regs = "%6s %6s %6s" % (A, B, C) 117 | elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: 118 | A = "R[%d]" % self.A 119 | B = "%d" % self.B 120 | 121 | if self.opcode in _KBx: 122 | B = "K[%d]" % self.B 123 | 124 | regs = "%6s %6s" % (A, B) 125 | 126 | return "%s : %s" % (instr, regs) 127 | 128 | def getAnnotation(self, chunk): 129 | if self.opcode == Opcodes.MOVE: 130 | return "move R[%d] into R[%d]" % (self.B, self.A) 131 | elif self.opcode == Opcodes.LOADK: 132 | return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A) 133 | elif self.opcode == Opcodes.GETGLOBAL: 134 | return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A) 135 | elif self.opcode == Opcodes.ADD: 136 | return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 137 | elif self.opcode == Opcodes.SUB: 138 | return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 139 | elif self.opcode == Opcodes.MUL: 140 | return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 141 | elif self.opcode == Opcodes.DIV: 142 | return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 143 | elif self.opcode == Opcodes.CONCAT: 144 | count = self.C - self.B + 1 145 | return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A) 146 | else: 147 | return "" 148 | 149 | class Constant: 150 | def __init__(self, type: ConstType, data) -> None: 151 | self.type = type 152 | self.data = data 153 | 154 | def toString(self): 155 | return "[%s] %s" % (self.type.name, str(self.data)) 156 | 157 | # format the constant so that it is parsable by lua 158 | def toCode(self): 159 | if self.type == ConstType.STRING: 160 | return "\"" + self.data + "\"" 161 | elif self.type == ConstType.BOOL: 162 | if self.data: 163 | return "true" 164 | else: 165 | return "false" 166 | elif self.type == ConstType.NUMBER: 167 | return "%g" % self.data 168 | else: 169 | return "nil" 170 | 171 | class Local: 172 | def __init__(self, name: str, start: int, end: int): 173 | self.name = name 174 | self.start = start 175 | self.end = end 176 | 177 | class Chunk: 178 | def __init__(self) -> None: 179 | self.constants: list[Constant] = [] 180 | self.instructions: list[Instruction] = [] 181 | self.protos: list[Chunk] = [] 182 | 183 | self.name: str = "Unnamed proto" 184 | self.frst_line: int = 0 185 | self.last_line: int = 0 186 | self.numUpvals: int = 0 187 | self.numParams: int = 0 188 | self.isVarg: bool = False 189 | self.maxStack: int = 0 190 | 191 | self.upvalues: list[str] = [] 192 | self.lineNums: list[int] = [] 193 | self.locals: list[Local] = [] 194 | 195 | def appendInstruction(self, instr: Instruction): 196 | self.instructions.append(instr) 197 | 198 | def appendConstant(self, const: Constant): 199 | self.constants.append(const) 200 | 201 | def appendProto(self, proto): 202 | self.protos.append(proto) 203 | 204 | def appendLine(self, line: int): 205 | self.lineNums.append(line) 206 | 207 | def appendLocal(self, local: Local): 208 | self.locals.append(local) 209 | 210 | def appendUpval(self, upval: str): 211 | self.upvalues.append(upval) 212 | 213 | def findLocal(self, pc: int) -> Local: 214 | for l in self.locals: 215 | if l.start <= pc and l.end >= pc: 216 | return l 217 | 218 | # there's no local information (may have been stripped) 219 | return None 220 | 221 | def getConstant(self, indx: int) -> Constant: 222 | return self.constants[indx] 223 | 224 | def print(self): 225 | print("\n==== [[" + str(self.name) + "'s constants]] ====\n") 226 | for i in range(len(self.constants)): 227 | print("%d: %s" % (i, self.constants[i].toString())) 228 | 229 | print("\n==== [[" + str(self.name) + "'s locals]] ====\n") 230 | for i in range(len(self.locals)): 231 | print("R[%d]: %s" % (i, self.locals[i].name)) 232 | 233 | print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") 234 | for i in range(len(self.instructions)): 235 | print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self))) 236 | 237 | if len(self.protos) > 0: 238 | print("\n==== [[" + str(self.name) + "'s protos]] ====\n") 239 | for z in self.protos: 240 | z.print() 241 | 242 | instr_lookup_tbl = [ 243 | Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"), 244 | Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"), 245 | Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"), 246 | Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"), 247 | Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"), 248 | Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"), 249 | Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"), 250 | Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"), 251 | Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"), 252 | Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"), 253 | Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"), 254 | Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"), 255 | Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG") 256 | ] 257 | 258 | # at [p]osition, with [s]ize of bits 259 | def get_bits(num: int, p: int, s: int): 260 | return (num>>p) & (~((~0)< int: 264 | return (num & (~((~((~0)< Instruction: 267 | opcode = get_bits(data, 0, 6) 268 | template = instr_lookup_tbl[opcode] 269 | instr = Instruction(template.type, template.name) 270 | 271 | # i read the lopcodes.h file to get these bit position and sizes. 272 | instr.opcode = opcode 273 | instr.A = get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8 274 | 275 | if instr.type == InstructionType.ABC: 276 | instr.B = get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9 277 | instr.C = get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9 278 | elif instr.type == InstructionType.ABx: 279 | instr.B = get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18 280 | elif instr.type == InstructionType.AsBx: 281 | instr.B = get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits 282 | 283 | return instr 284 | 285 | # returns a u32 instruction 286 | def _encode_instr(instr: Instruction) -> int: 287 | data = 0 288 | 289 | # encode instruction (basically, do the inverse of _decode_instr) 290 | data = set_bits(data, instr.opcode, 0, 6) 291 | data = set_bits(data, instr.A, 6, 8) 292 | 293 | if instr.type == InstructionType.ABC: 294 | data = set_bits(data, instr.B, 23, 9) 295 | data = set_bits(data, instr.C, 14, 9) 296 | elif instr.type == InstructionType.ABx: 297 | data = set_bits(data, instr.B, 14, 18) 298 | elif instr.type == InstructionType.AsBx: 299 | data = set_bits(data, instr.B + 131071, 14, 18) 300 | 301 | return data 302 | 303 | class LuaUndump: 304 | def __init__(self): 305 | self.rootChunk: Chunk = None 306 | self.index = 0 307 | 308 | def _loadBlock(self, sz) -> bytearray: 309 | if self.index + sz > len(self.bytecode): 310 | raise Exception("Malformed bytecode!") 311 | 312 | temp = bytearray(self.bytecode[self.index:self.index+sz]) 313 | self.index = self.index + sz 314 | return temp 315 | 316 | def _get_byte(self) -> int: 317 | return self._loadBlock(1)[0] 318 | 319 | def _get_uint32(self) -> int: 320 | order = 'big' if self.big_endian else 'little' 321 | return int.from_bytes(self._loadBlock(4), byteorder=order, signed=False) 322 | 323 | def _get_uint(self) -> int: 324 | order = 'big' if self.big_endian else 'little' 325 | return int.from_bytes(self._loadBlock(self.int_size), byteorder=order, signed=False) 326 | 327 | def _get_size_t(self) -> int: 328 | order = 'big' if self.big_endian else 'little' 329 | return int.from_bytes(self._loadBlock(self.size_t), byteorder=order, signed=False) 330 | 331 | def _get_double(self) -> int: 332 | order = '>d' if self.big_endian else ' str: 336 | size = self._get_size_t() 337 | if (size == 0): 338 | return "" 339 | 340 | # [:-1] to remove the NULL terminator 341 | return ("".join(chr(x) for x in self._loadBlock(size)))[:-1] 342 | 343 | def decode_chunk(self) -> Chunk: 344 | chunk = Chunk() 345 | 346 | # chunk meta info 347 | chunk.name = self._get_string() 348 | chunk.frst_line = self._get_uint() 349 | chunk.last_line = self._get_uint() 350 | chunk.numUpvals = self._get_byte() 351 | chunk.numParams = self._get_byte() 352 | chunk.isVarg = (self._get_byte() != 0) 353 | chunk.maxStack = self._get_byte() 354 | 355 | # parse instructions 356 | num = self._get_uint() 357 | for i in range(num): 358 | chunk.appendInstruction(_decode_instr(self._get_uint32())) 359 | 360 | # get constants 361 | num = self._get_uint() 362 | for i in range(num): 363 | constant: Constant = None 364 | type = self._get_byte() 365 | 366 | if type == 0: # nil 367 | constant = Constant(ConstType.NIL, None) 368 | elif type == 1: # bool 369 | constant = Constant(ConstType.BOOL, (self._get_byte() != 0)) 370 | elif type == 3: # number 371 | constant = Constant(ConstType.NUMBER, self._get_double()) 372 | elif type == 4: # string 373 | constant = Constant(ConstType.STRING, self._get_string()) 374 | else: 375 | raise Exception("Unknown Datatype! [%d]" % type) 376 | 377 | chunk.appendConstant(constant) 378 | 379 | # parse protos 380 | num = self._get_uint() 381 | for i in range(num): 382 | chunk.appendProto(self.decode_chunk()) 383 | 384 | # debug stuff, maybe i'll add this to chunks to have better disassembly annotation in the future? 385 | # eh, for now just consume the bytes. 386 | 387 | # line numbers 388 | num = self._get_uint() 389 | for i in range(num): 390 | self._get_uint() 391 | 392 | # locals 393 | num = self._get_uint() 394 | for i in range(num): 395 | name = self._get_string() # local name 396 | start = self._get_uint() # local start PC 397 | end = self._get_uint() # local end PC 398 | chunk.appendLocal(Local(name, start, end)) 399 | 400 | # upvalues 401 | num = self._get_uint() 402 | for i in range(num): 403 | chunk.appendUpval(self._get_string()) # upvalue name 404 | 405 | return chunk 406 | 407 | def decode_rawbytecode(self, rawbytecode): 408 | # bytecode sanity checks 409 | if not rawbytecode[0:4] == _LUAMAGIC: 410 | raise Exception("Lua Bytecode expected!") 411 | 412 | bytecode = array.array('b', rawbytecode) 413 | return self.decode_bytecode(bytecode) 414 | 415 | def decode_bytecode(self, bytecode): 416 | self.bytecode = bytecode 417 | 418 | # aligns index, skips header 419 | self.index = 4 420 | 421 | self.vm_version = self._get_byte() 422 | self.bytecode_format = self._get_byte() 423 | self.big_endian = (self._get_byte() == 0) 424 | self.int_size = self._get_byte() 425 | self.size_t = self._get_byte() 426 | self.instr_size = self._get_byte() # gets size of instructions 427 | self.l_number_size = self._get_byte() # size of lua_Number 428 | self.integral_flag = self._get_byte() # is lua_Number defined as an int? false = float/double, true = int/long/short/etc. 429 | 430 | self.rootChunk = self.decode_chunk() 431 | return self.rootChunk 432 | 433 | def loadFile(self, luaCFile): 434 | with open(luaCFile, 'rb') as luac_file: 435 | bytecode = luac_file.read() 436 | return self.decode_rawbytecode(bytecode) 437 | 438 | def print_dissassembly(self): 439 | self.rootChunk.print() 440 | 441 | class LuaDump: 442 | def __init__(self, rootChunk: Chunk): 443 | self.rootChunk = rootChunk 444 | self.bytecode = bytearray() 445 | 446 | # header info 447 | self.vm_version = 0x51 448 | self.bytecode_format = 0x00 449 | self.big_endian = False 450 | 451 | # data sizes 452 | self.int_size = 4 453 | self.size_t = 8 454 | self.instr_size = 4 455 | self.l_number_size = 8 456 | self.integral_flag = False # lua_Number is a double 457 | 458 | def _writeBlock(self, data: bytes): 459 | self.bytecode += bytearray(data) 460 | 461 | def _set_byte(self, b: int): 462 | self.bytecode.append(b) 463 | 464 | def _set_uint32(self, i: int): 465 | order = 'big' if self.big_endian else 'little' 466 | self._writeBlock(i.to_bytes(4, order, signed=False)) 467 | 468 | def _set_uint(self, i: int): 469 | order = 'big' if self.big_endian else 'little' 470 | self._writeBlock(i.to_bytes(self.int_size, order, signed=False)) 471 | 472 | def _set_size_t(self, i: int): 473 | order = 'big' if self.big_endian else 'little' 474 | self._writeBlock(i.to_bytes(self.size_t, order, signed=False)) 475 | 476 | def _set_double(self, f: float): 477 | order = '>d' if self.big_endian else ' bytearray: 554 | self._dumpHeader() 555 | self._dumpChunk(self.rootChunk) 556 | 557 | return self.bytecode -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import lundump 4 | import lparser 5 | 6 | lc = lundump.LuaUndump() 7 | print(sys.argv[1]) 8 | chunk = lc.loadFile(sys.argv[1]) 9 | 10 | lc.print_dissassembly() 11 | 12 | lp = lparser.LuaDecomp(chunk) 13 | 14 | print("\n==== [[" + str(chunk.name) + "'s pseudo-code]] ====\n") 15 | print(lp.getPseudoCode()) --------------------------------------------------------------------------------