├── README.md ├── check_indent.lua ├── dumpfunc.lua ├── dumpindent.lua ├── genlfunc.lua ├── lexer.lua ├── parser.lua ├── serialize.lua ├── test.lua └── utils.lua /README.md: -------------------------------------------------------------------------------- 1 | # dump_lua_function 2 | -------------------------------------------------------------------------------- /check_indent.lua: -------------------------------------------------------------------------------- 1 | local parser = require "parser" 2 | local M = {} 3 | 4 | local function do_check_indent(indent_str, location, ret) 5 | assert(#indent_str>0) 6 | local has_tab = string.find(indent_str, "\t") 7 | local has_space = string.find(indent_str, " ") 8 | if has_tab then 9 | table.insert(ret.tab, location) 10 | end 11 | if has_space then 12 | table.insert(ret.space, location) 13 | end 14 | end 15 | 16 | local function reolve_node(source, node, last_line, ret) 17 | local location = node.location 18 | local line = location and location.line 19 | if line and line ~= last_line then 20 | local offset = location.offset 21 | local indent_len = location.column-1 22 | local indent_str = string.sub(source, offset-indent_len, offset-1) 23 | if #indent_str > 0 and string.match(indent_str, "^%s+$") then 24 | do_check_indent(indent_str, location, ret) 25 | end 26 | last_line = location.line 27 | end 28 | 29 | for i,v in ipairs(node) do 30 | if type(v) == "table" then 31 | reolve_node(source, v, last_line, ret) 32 | end 33 | end 34 | end 35 | 36 | local function readfile(file_path) 37 | local fd = io.open(file_path, "rb") 38 | local s = fd:read("a") 39 | fd:close() 40 | return s 41 | end 42 | 43 | function M.check_indent(file_path) 44 | local source = readfile(file_path) 45 | local ret = { 46 | tab = {}, 47 | space = {}, 48 | } 49 | local ast = parser(source) 50 | indent_char_byte = nil 51 | reolve_node(source, ast, nil, ret) 52 | if #ret.tab<=#ret.space then 53 | return ret.tab, "tab" 54 | else 55 | return ret.space, "space" 56 | end 57 | end 58 | 59 | return M 60 | 61 | -------------------------------------------------------------------------------- /dumpfunc.lua: -------------------------------------------------------------------------------- 1 | local parser = require "genlfunc" 2 | local seri = require "serialize" 3 | local lfs = require "lfs" 4 | local utils = require "utils" 5 | local list_all_lua = utils.list_all_lua 6 | local join = utils.join 7 | 8 | 9 | local function reshape_func(info) 10 | local ret = {} 11 | for i,v in ipairs(info) do 12 | local def = v.range[1] 13 | if not ret[def] then 14 | ret[def] = v.name 15 | end 16 | end 17 | return ret 18 | end 19 | 20 | local function read_file(path) 21 | local fd =io.open(path, "r") 22 | -- print(path) 23 | assert(fd, path) 24 | local s = fd:read("a") 25 | fd:close() 26 | return s 27 | end 28 | 29 | local function write_file(source, path) 30 | local fd = io.open(path, "w") 31 | fd:write("return ") 32 | fd:write(source) 33 | fd:close() 34 | end 35 | 36 | local lua_dir, out_lua = ... 37 | local function main() 38 | local ret = list_all_lua(lua_dir) 39 | local func_ret = {} 40 | for path, v in pairs(ret) do 41 | local full_path = join(lua_dir, path) 42 | local source = read_file(full_path) 43 | local info = parser(source) 44 | func_ret[path] = reshape_func(info) 45 | end 46 | local s = seri(func_ret) 47 | write_file(s, out_lua) 48 | print("dump function success to " .. out_lua) 49 | end 50 | 51 | 52 | main() -------------------------------------------------------------------------------- /dumpindent.lua: -------------------------------------------------------------------------------- 1 | local ci = require "check_indent" 2 | local lfs = require "lfs" 3 | local utils = require "utils" 4 | 5 | local lua_dir = ... 6 | 7 | local function main() 8 | local ret = utils.list_all_lua(lua_dir) 9 | local count = 1 10 | for path, v in pairs(ret) do 11 | local full_path = utils.join(lua_dir, path) 12 | local result, char = ci.check_indent(full_path) 13 | for _, location in ipairs(result) do 14 | print(string.format("[%s] %s:%s inconsistent indent (%s).", 15 | count, path, location.line, char)) 16 | count = count + 1 17 | end 18 | end 19 | end 20 | 21 | main() -------------------------------------------------------------------------------- /genlfunc.lua: -------------------------------------------------------------------------------- 1 | local parser = require "parser" 2 | 3 | local function append(ret, name, location, end_location) 4 | ret[#ret+1] = { 5 | name = name, 6 | range = {location.line, end_location.line}, 7 | } 8 | end 9 | 10 | local function dump_local_function(root, ret) 11 | local left = root[1] 12 | local right = root[2] 13 | if right then 14 | for i,v in ipairs(right) do 15 | if v.tag == "Function" then 16 | local name = left[i] and left[i][1] 17 | if name then 18 | append(ret, name, v.location, v.end_location) 19 | end 20 | end 21 | end 22 | end 23 | end 24 | 25 | local function dump_localrec_function (root, ret) 26 | local location = root.location 27 | local end_location = root[2].end_location 28 | local name = root[1][1] 29 | append(ret, name, location, end_location) 30 | end 31 | 32 | local function dump_index(root) 33 | if root.tag == "Id" then 34 | return root[1] 35 | end 36 | assert(root.tag == "Index") 37 | local ret = {} 38 | for i,v in ipairs(root) do 39 | local tag = v.tag 40 | if tag == "Id" or tag == "String" then 41 | ret[#ret+1] = v[1] 42 | elseif tag == "Index" then 43 | ret[#ret+1] = dump_index(v) 44 | end 45 | end 46 | return table.concat(ret, ".") 47 | end 48 | 49 | 50 | local function dump_set_function(root, ret) 51 | local name = dump_index(root[1][1]) 52 | local location = root[2][1].location 53 | local end_location = root[2][1].end_location 54 | append(ret, name, location, end_location) 55 | end 56 | 57 | 58 | -- 只遍历最外层的function 59 | local function dump_block(root, ret) 60 | ret = ret or {} 61 | local root_tag = root.tag 62 | for i,v in ipairs(root) do 63 | if root_tag == "Block" then 64 | local tag = v.tag 65 | if tag == "Set" and v.first_token == "function" then 66 | dump_set_function(v, ret) 67 | elseif tag == "Localrec" then 68 | dump_localrec_function(v, ret) 69 | elseif tag == "Local" then 70 | dump_local_function(v, ret) 71 | end 72 | end 73 | dump_block(v, ret) 74 | end 75 | return ret 76 | end 77 | 78 | 79 | local function do_parser(source) 80 | local ast = parser(source) 81 | local info = dump_block(ast, {}) 82 | return info 83 | end 84 | 85 | 86 | return do_parser 87 | -------------------------------------------------------------------------------- /lexer.lua: -------------------------------------------------------------------------------- 1 | -- Lexer should support syntax of Lua 5.1, Lua 5.2, Lua 5.3 and LuaJIT(64bit and complex cdata literals). 2 | local lexer = {} 3 | 4 | local sbyte = string.byte 5 | local ssub = string.sub 6 | local schar = string.char 7 | local sreverse = string.reverse 8 | local tconcat = table.concat 9 | local mfloor = math.floor 10 | 11 | -- No point in inlining these, fetching a constant ~= fetching a local. 12 | local BYTE_0, BYTE_9, BYTE_f, BYTE_F = sbyte("0"), sbyte("9"), sbyte("f"), sbyte("F") 13 | local BYTE_x, BYTE_X, BYTE_i, BYTE_I = sbyte("x"), sbyte("X"), sbyte("i"), sbyte("I") 14 | local BYTE_l, BYTE_L, BYTE_u, BYTE_U = sbyte("l"), sbyte("L"), sbyte("u"), sbyte("U") 15 | local BYTE_e, BYTE_E, BYTE_p, BYTE_P = sbyte("e"), sbyte("E"), sbyte("p"), sbyte("P") 16 | local BYTE_a, BYTE_z, BYTE_A, BYTE_Z = sbyte("a"), sbyte("z"), sbyte("A"), sbyte("Z") 17 | local BYTE_DOT, BYTE_COLON = sbyte("."), sbyte(":") 18 | local BYTE_OBRACK, BYTE_CBRACK = sbyte("["), sbyte("]") 19 | local BYTE_OBRACE, BYTE_CBRACE = sbyte("{"), sbyte("}") 20 | local BYTE_QUOTE, BYTE_DQUOTE = sbyte("'"), sbyte('"') 21 | local BYTE_PLUS, BYTE_DASH, BYTE_LDASH = sbyte("+"), sbyte("-"), sbyte("_") 22 | local BYTE_SLASH, BYTE_BSLASH = sbyte("/"), sbyte("\\") 23 | local BYTE_EQ, BYTE_NE = sbyte("="), sbyte("~") 24 | local BYTE_LT, BYTE_GT = sbyte("<"), sbyte(">") 25 | local BYTE_LF, BYTE_CR = sbyte("\n"), sbyte("\r") 26 | local BYTE_SPACE, BYTE_FF, BYTE_TAB, BYTE_VTAB = sbyte(" "), sbyte("\f"), sbyte("\t"), sbyte("\v") 27 | 28 | local BYTE_ZH_MIN = 128 29 | local BYTE_ZH_MAX = 244 30 | 31 | local function to_hex(b) 32 | if BYTE_0 <= b and b <= BYTE_9 then 33 | return b-BYTE_0 34 | elseif BYTE_a <= b and b <= BYTE_f then 35 | return 10+b-BYTE_a 36 | elseif BYTE_A <= b and b <= BYTE_F then 37 | return 10+b-BYTE_A 38 | else 39 | return nil 40 | end 41 | end 42 | 43 | local function to_dec(b) 44 | if BYTE_0 <= b and b <= BYTE_9 then 45 | return b-BYTE_0 46 | else 47 | return nil 48 | end 49 | end 50 | 51 | local function to_utf(codepoint) 52 | if codepoint < 0x80 then -- ASCII? 53 | return schar(codepoint) 54 | end 55 | 56 | local buf = {} 57 | local mfb = 0x3F 58 | 59 | repeat 60 | buf[#buf+1] = schar(codepoint % 0x40 + 0x80) 61 | codepoint = mfloor(codepoint / 0x40) 62 | mfb = mfloor(mfb / 2) 63 | until codepoint <= mfb 64 | 65 | buf[#buf+1] = schar(0xFE - mfb*2 + codepoint) 66 | return sreverse(tconcat(buf)) 67 | end 68 | 69 | 70 | 71 | local mask = { 72 | [BYTE_LDASH] = "alpha", 73 | [BYTE_LF] = "newline", 74 | [BYTE_CR] = "newline", 75 | 76 | [BYTE_SPACE] = "space", 77 | [BYTE_FF] = "space", 78 | [BYTE_TAB] = "space", 79 | [BYTE_VTAB] = "space", 80 | } 81 | for i=BYTE_a, BYTE_z do 82 | mask[i] = "alpha" 83 | end 84 | 85 | for i=BYTE_A, BYTE_Z do 86 | mask[i] = "alpha" 87 | end 88 | 89 | for i=BYTE_ZH_MIN, BYTE_ZH_MAX do 90 | mask[i] = "zh" 91 | end 92 | 93 | local function is_zh(b) 94 | return mask[b]=="zh" 95 | -- return b >= BYTE_ZH_MIN and b <= BYTE_ZH_MAX 96 | end 97 | 98 | 99 | local function is_alpha(b) 100 | return mask[b]=="alpha" 101 | -- return (BYTE_a <= b and b <= BYTE_z) or 102 | -- (BYTE_A <= b and b <= BYTE_Z) or b == BYTE_LDASH 103 | end 104 | 105 | local function is_newline(b) 106 | return mask[b]=="newline" 107 | -- return (b == BYTE_LF) or (b == BYTE_CR) 108 | end 109 | 110 | local function is_space(b) 111 | return mask[b]=="space" 112 | -- return (b == BYTE_SPACE) or (b == BYTE_FF) or 113 | -- (b == BYTE_TAB) or (b == BYTE_VTAB) 114 | end 115 | 116 | 117 | local keywords = { 118 | ["and"] = true, ["break"] = true, ["do"] = true, ["else"] = true, ["elseif"] = true, ["end"] = true, ["false"] = true, ["for"] = true, ["function"] = true, ["goto"] = true, ["if"] = true, ["in"] = true, 119 | ["local"] = true, ["nil"] = true, ["not"] = true, ["or"] = true, ["repeat"] = true, ["return"] = true, ["then"] = true, ["true"] = true, ["until"] = true, ["while"] = true, 120 | } 121 | 122 | local simple_escapes = { 123 | [sbyte("a")] = sbyte("\a"), 124 | [sbyte("b")] = sbyte("\b"), 125 | [sbyte("f")] = sbyte("\f"), 126 | [sbyte("n")] = sbyte("\n"), 127 | [sbyte("r")] = sbyte("\r"), 128 | [sbyte("t")] = sbyte("\t"), 129 | [sbyte("v")] = sbyte("\v"), 130 | [BYTE_BSLASH] = BYTE_BSLASH, 131 | [BYTE_QUOTE] = BYTE_QUOTE, 132 | [BYTE_DQUOTE] = BYTE_DQUOTE 133 | } 134 | 135 | local function next_byte(state, inc) 136 | inc = inc or 1 137 | state.offset = state.offset+inc 138 | return sbyte(state.src, state.offset) 139 | end 140 | 141 | -- Skipping helpers. 142 | -- Take the current character, skip something, return next character. 143 | 144 | local function skip_newline(state, newline) 145 | local b = next_byte(state) 146 | 147 | if b ~= newline and is_newline(b) then 148 | b = next_byte(state) 149 | end 150 | 151 | state.line = state.line+1 152 | state.line_offset = state.offset 153 | return b 154 | end 155 | 156 | local function skip_till_newline(state, b) 157 | while not is_newline(b) and b ~= nil do 158 | b = next_byte(state) 159 | end 160 | 161 | return b 162 | end 163 | 164 | local function skip_space(state, b) 165 | while is_space(b) or is_newline(b) do 166 | if is_newline(b) then 167 | b = skip_newline(state, b) 168 | else 169 | b = next_byte(state) 170 | end 171 | end 172 | 173 | return b 174 | end 175 | 176 | -- Skips "[=*" or "]=*". Returns next character and number of "="s. 177 | local function skip_long_bracket(state) 178 | local start = state.offset 179 | local b = next_byte(state) 180 | 181 | while b == BYTE_EQ do 182 | b = next_byte(state) 183 | end 184 | 185 | return b, state.offset-start-1 186 | end 187 | 188 | -- Token handlers. 189 | 190 | -- Called after the opening "[=*" has been skipped. 191 | -- Takes number of "=" in the opening bracket and token type(comment or string). 192 | local function lex_long_string(state, opening_long_bracket, token) 193 | local b = next_byte(state) 194 | 195 | if is_newline(b) then 196 | b = skip_newline(state, b) 197 | end 198 | 199 | local lines = {} 200 | local line_start = state.offset 201 | 202 | while true do 203 | if is_newline(b) then 204 | -- Add the finished line. 205 | lines[#lines+1] = ssub(state.src, line_start, state.offset-1) 206 | 207 | b = skip_newline(state, b) 208 | line_start = state.offset 209 | elseif b == BYTE_CBRACK then 210 | local long_bracket 211 | b, long_bracket = skip_long_bracket(state) 212 | 213 | if b == BYTE_CBRACK and long_bracket == opening_long_bracket then 214 | break 215 | end 216 | elseif b == nil then 217 | return nil, token == "string" and "unfinished long string" or "unfinished long comment" 218 | else 219 | b = next_byte(state) 220 | end 221 | end 222 | 223 | -- Add last line. 224 | lines[#lines+1] = ssub(state.src, line_start, state.offset-opening_long_bracket-2) 225 | next_byte(state) 226 | return token, tconcat(lines, "\n") 227 | end 228 | 229 | local function lex_short_string(state, quote) 230 | local b = next_byte(state) 231 | local chunks -- Buffer is only required when there are escape sequences. 232 | local chunk_start = state.offset 233 | 234 | while b ~= quote do 235 | if b == BYTE_BSLASH then 236 | -- Escape sequence. 237 | 238 | if not chunks then 239 | -- This is the first escape sequence, init buffer. 240 | chunks = {} 241 | end 242 | 243 | -- Put previous chunk into buffer. 244 | if chunk_start ~= state.offset then 245 | chunks[#chunks+1] = ssub(state.src, chunk_start, state.offset-1) 246 | end 247 | 248 | b = next_byte(state) 249 | 250 | -- The final string escape sequence evaluates to. 251 | local s 252 | 253 | local escape_byte = simple_escapes[b] 254 | 255 | if escape_byte then -- Is it a simple escape sequence? 256 | b = next_byte(state) 257 | s = schar(escape_byte) 258 | elseif is_newline(b) then 259 | b = skip_newline(state, b) 260 | s = "\n" 261 | elseif b == BYTE_x then 262 | -- Hexadecimal escape. 263 | b = next_byte(state) -- Skip "x". 264 | -- Exactly two hexadecimal digits. 265 | local c1, c2 266 | 267 | if b then 268 | c1 = to_hex(b) 269 | end 270 | 271 | if not c1 then 272 | return nil, "invalid hexadecimal escape sequence", -2 273 | end 274 | 275 | b = next_byte(state) 276 | 277 | if b then 278 | c2 = to_hex(b) 279 | end 280 | 281 | if not c2 then 282 | return nil, "invalid hexadecimal escape sequence", -3 283 | end 284 | 285 | b = next_byte(state) 286 | s = schar(c1*16 + c2) 287 | elseif b == BYTE_u then 288 | b = next_byte(state) -- Skip "u". 289 | 290 | if b ~= BYTE_OBRACE then 291 | return nil, "invalid UTF-8 escape sequence", -2 292 | end 293 | 294 | b = next_byte(state) -- Skip "{". 295 | 296 | local codepoint -- There should be at least one digit. 297 | 298 | if b then 299 | codepoint = to_hex(b) 300 | end 301 | 302 | if not codepoint then 303 | return nil, "invalid UTF-8 escape sequence", -3 304 | end 305 | 306 | local hexdigits = 0 307 | 308 | while true do 309 | b = next_byte(state) 310 | local hex 311 | 312 | if b then 313 | hex = to_hex(b) 314 | end 315 | 316 | if hex then 317 | hexdigits = hexdigits + 1 318 | codepoint = codepoint*16 + hex 319 | 320 | if codepoint > 0x10FFFF then 321 | -- UTF-8 value too large. 322 | return nil, "invalid UTF-8 escape sequence", -hexdigits-3 323 | end 324 | else 325 | break 326 | end 327 | end 328 | 329 | if b ~= BYTE_CBRACE then 330 | return nil, "invalid UTF-8 escape sequence", -hexdigits-4 331 | end 332 | 333 | b = next_byte(state) -- Skip "}". 334 | s = to_utf(codepoint) 335 | elseif b == BYTE_z then 336 | -- Zap following span of spaces. 337 | b = skip_space(state, next_byte(state)) 338 | else 339 | -- Must be a decimal escape. 340 | local cb 341 | 342 | if b then 343 | cb = to_dec(b) 344 | end 345 | 346 | if not cb then 347 | return nil, "invalid escape sequence", -1 348 | end 349 | 350 | -- Up to three decimal digits. 351 | b = next_byte(state) 352 | 353 | if b then 354 | local c2 = to_dec(b) 355 | 356 | if c2 then 357 | cb = 10*cb + c2 358 | b = next_byte(state) 359 | 360 | if b then 361 | local c3 = to_dec(b) 362 | 363 | if c3 then 364 | cb = 10*cb + c3 365 | 366 | if cb > 255 then 367 | return nil, "invalid decimal escape sequence", -3 368 | end 369 | 370 | b = next_byte(state) 371 | end 372 | end 373 | end 374 | end 375 | 376 | s = schar(cb) 377 | end 378 | 379 | if s then 380 | chunks[#chunks+1] = s 381 | end 382 | 383 | -- Next chunk starts after escape sequence. 384 | chunk_start = state.offset 385 | elseif b == nil or is_newline(b) then 386 | return nil, "unfinished string" 387 | else 388 | b = next_byte(state) 389 | end 390 | end 391 | 392 | -- Offset now points at the closing quote. 393 | local string_value 394 | 395 | if chunks then 396 | -- Put last chunk into buffer. 397 | if chunk_start ~= state.offset then 398 | chunks[#chunks+1] = ssub(state.src, chunk_start, state.offset-1) 399 | end 400 | 401 | string_value = tconcat(chunks) 402 | else 403 | -- There were no escape sequences. 404 | string_value = ssub(state.src, chunk_start, state.offset-1) 405 | end 406 | 407 | next_byte(state) -- Skip the closing quote. 408 | return "string", string_value 409 | end 410 | 411 | -- Payload for a number is simply a substring. 412 | -- Luacheck is supposed to be forward-compatible with Lua 5.3 and LuaJIT syntax, so 413 | -- parsing it into actual number may be problematic. 414 | -- It is not needed currently anyway as Luacheck does not do static evaluation yet. 415 | local function lex_number(state, b) 416 | local start = state.offset 417 | 418 | local exp_lower, exp_upper = BYTE_e, BYTE_E 419 | local is_digit = to_dec 420 | local has_digits = false 421 | local is_float = false 422 | 423 | if b == BYTE_0 then 424 | b = next_byte(state) 425 | 426 | if b == BYTE_x or b == BYTE_X then 427 | exp_lower, exp_upper = BYTE_p, BYTE_P 428 | is_digit = to_hex 429 | b = next_byte(state) 430 | else 431 | has_digits = true 432 | end 433 | end 434 | 435 | while b ~= nil and is_digit(b) do 436 | b = next_byte(state) 437 | has_digits = true 438 | end 439 | 440 | if b == BYTE_DOT then 441 | -- Fractional part. 442 | is_float = true 443 | b = next_byte(state) -- Skip dot. 444 | 445 | while b ~= nil and is_digit(b) do 446 | b = next_byte(state) 447 | has_digits = true 448 | end 449 | end 450 | 451 | if b == exp_lower or b == exp_upper then 452 | -- Exponent part. 453 | is_float = true 454 | b = next_byte(state) 455 | 456 | -- Skip optional sign. 457 | if b == BYTE_PLUS or b == BYTE_DASH then 458 | b = next_byte(state) 459 | end 460 | 461 | -- Exponent consists of one or more decimal digits. 462 | if b == nil or not to_dec(b) then 463 | return nil, "malformed number" 464 | end 465 | 466 | repeat 467 | b = next_byte(state) 468 | until b == nil or not to_dec(b) 469 | end 470 | 471 | if not has_digits then 472 | return nil, "malformed number" 473 | end 474 | 475 | -- Is it cdata literal? 476 | if b == BYTE_i or b == BYTE_I then 477 | -- It is complex literal. Skip "i" or "I". 478 | next_byte(state) 479 | else 480 | -- uint64_t and int64_t literals can not be fractional. 481 | if not is_float then 482 | if b == BYTE_u or b == BYTE_U then 483 | -- It may be uint64_t literal. 484 | local b1, b2 = sbyte(state.src, state.offset+1, state.offset+2) 485 | 486 | if (b1 == BYTE_l or b1 == BYTE_L) and (b2 == BYTE_l or b2 == BYTE_L) then 487 | -- It is uint64_t literal. 488 | next_byte(state, 3) 489 | end 490 | elseif b == BYTE_l or b == BYTE_L then 491 | -- It may be uint64_t or int64_t literal. 492 | local b1, b2 = sbyte(state.src, state.offset+1, state.offset+2) 493 | 494 | if b1 == BYTE_l or b1 == BYTE_L then 495 | if b2 == BYTE_u or b2 == BYTE_U then 496 | -- It is uint64_t literal. 497 | next_byte(state, 3) 498 | else 499 | -- It is int64_t literal. 500 | next_byte(state, 2) 501 | end 502 | end 503 | end 504 | end 505 | end 506 | 507 | return "number", ssub(state.src, start, state.offset-1) 508 | end 509 | 510 | local function lex_ident(state) 511 | local start = state.offset 512 | local b = next_byte(state) 513 | 514 | while (b ~= nil) and (is_alpha(b) or to_dec(b) or is_zh(b)) do 515 | b = next_byte(state) 516 | end 517 | 518 | local ident = ssub(state.src, start, state.offset-1) 519 | 520 | if keywords[ident] then 521 | return ident 522 | else 523 | return "name", ident 524 | end 525 | end 526 | 527 | local function lex_dash(state) 528 | local b = next_byte(state) 529 | 530 | -- Is it "-" or comment? 531 | if b ~= BYTE_DASH then 532 | return "-" 533 | else 534 | -- It is a comment. 535 | b = next_byte(state) 536 | local start = state.offset 537 | 538 | -- Is it a long comment? 539 | if b == BYTE_OBRACK then 540 | local long_bracket 541 | b, long_bracket = skip_long_bracket(state) 542 | 543 | if b == BYTE_OBRACK then 544 | return lex_long_string(state, long_bracket, "comment") 545 | end 546 | end 547 | 548 | -- Short comment. 549 | b = skip_till_newline(state, b) 550 | local comment_value = ssub(state.src, start, state.offset-1) 551 | skip_newline(state, b) 552 | return "comment", comment_value 553 | end 554 | end 555 | 556 | local function lex_bracket(state) 557 | -- Is it "[" or long string? 558 | local b, long_bracket = skip_long_bracket(state) 559 | 560 | if b == BYTE_OBRACK then 561 | return lex_long_string(state, long_bracket, "string") 562 | elseif long_bracket == 0 then 563 | return "[" 564 | else 565 | return nil, "invalid long string delimiter" 566 | end 567 | end 568 | 569 | local function lex_eq(state) 570 | local b = next_byte(state) 571 | 572 | if b == BYTE_EQ then 573 | next_byte(state) 574 | return "==" 575 | else 576 | return "=" 577 | end 578 | end 579 | 580 | local function lex_lt(state) 581 | local b = next_byte(state) 582 | 583 | if b == BYTE_EQ then 584 | next_byte(state) 585 | return "<=" 586 | elseif b == BYTE_LT then 587 | next_byte(state) 588 | return "<<" 589 | else 590 | return "<" 591 | end 592 | end 593 | 594 | local function lex_gt(state) 595 | local b = next_byte(state) 596 | 597 | if b == BYTE_EQ then 598 | next_byte(state) 599 | return ">=" 600 | elseif b == BYTE_GT then 601 | next_byte(state) 602 | return ">>" 603 | else 604 | return ">" 605 | end 606 | end 607 | 608 | local function lex_div(state) 609 | local b = next_byte(state) 610 | 611 | if b == BYTE_SLASH then 612 | next_byte(state) 613 | return "//" 614 | else 615 | return "/" 616 | end 617 | end 618 | 619 | local function lex_ne(state) 620 | local b = next_byte(state) 621 | 622 | if b == BYTE_EQ then 623 | next_byte(state) 624 | return "~=" 625 | else 626 | return "~" 627 | end 628 | end 629 | 630 | local function lex_colon(state) 631 | local b = next_byte(state) 632 | 633 | if b == BYTE_COLON then 634 | next_byte(state) 635 | return "::" 636 | else 637 | return ":" 638 | end 639 | end 640 | 641 | local function lex_dot(state) 642 | local b = next_byte(state) 643 | 644 | if b == BYTE_DOT then 645 | b = next_byte(state) 646 | 647 | if b == BYTE_DOT then 648 | next_byte(state) 649 | return "...", "..." 650 | else 651 | return ".." 652 | end 653 | elseif b and to_dec(b) then 654 | -- Backtrack to dot. 655 | return lex_number(state, next_byte(state, -1)) 656 | else 657 | return "." 658 | end 659 | end 660 | 661 | local function lex_any(state, b) 662 | next_byte(state) 663 | return schar(b) 664 | end 665 | 666 | -- Maps first bytes of tokens to functions that handle them. 667 | -- Each handler takes the first byte as an argument. 668 | -- Each handler stops at the character after the token and returns the token and, 669 | -- optionally, a value associated with the token. 670 | -- On error handler returns nil, error message and, optionally, start of reported location as negative offset. 671 | local byte_handlers = { 672 | [BYTE_DOT] = lex_dot, 673 | [BYTE_COLON] = lex_colon, 674 | [BYTE_OBRACK] = lex_bracket, 675 | [BYTE_QUOTE] = lex_short_string, 676 | [BYTE_DQUOTE] = lex_short_string, 677 | [BYTE_DASH] = lex_dash, 678 | [BYTE_SLASH] = lex_div, 679 | [BYTE_EQ] = lex_eq, 680 | [BYTE_NE] = lex_ne, 681 | [BYTE_LT] = lex_lt, 682 | [BYTE_GT] = lex_gt, 683 | [BYTE_LDASH] = lex_ident 684 | } 685 | 686 | for b=BYTE_0, BYTE_9 do 687 | byte_handlers[b] = lex_number 688 | end 689 | 690 | for b=BYTE_a, BYTE_z do 691 | byte_handlers[b] = lex_ident 692 | end 693 | 694 | for b=BYTE_A, BYTE_Z do 695 | byte_handlers[b] = lex_ident 696 | end 697 | 698 | for b=BYTE_ZH_MIN, BYTE_ZH_MAX do 699 | byte_handlers[b] = lex_ident 700 | end 701 | 702 | local function decimal_escaper(char) 703 | return "\\" .. tostring(sbyte(char)) 704 | end 705 | 706 | -- Returns quoted printable representation of s. 707 | function lexer.quote(s) 708 | return "'" .. s:gsub("[^\32-\126]", decimal_escaper) .. "'" 709 | end 710 | 711 | -- Creates and returns lexer state for source. 712 | function lexer.new_state(src) 713 | local state = { 714 | src = src, 715 | line = 1, 716 | line_offset = 1, 717 | offset = 1 718 | } 719 | 720 | if ssub(src, 1, 2) == "#!" then 721 | -- Skip shebang. 722 | skip_newline(state, skip_till_newline(state, next_byte(state, 2))) 723 | end 724 | 725 | return state 726 | end 727 | 728 | function lexer.syntax_error(location, end_column, msg) 729 | error(setmetatable({ 730 | line = location.line, 731 | column = location.column, 732 | end_column = end_column, 733 | msg = msg}, { 734 | __tostring = function (t) 735 | local s = string.format("%s @line:%d column:%d end_column:%d", 736 | t.msg, t.line, t.column, t.end_column) 737 | return s 738 | end} 739 | ) 740 | ) 741 | end 742 | 743 | -- Looks for next token starting from state.line, state.line_offset, state.offset. 744 | -- Returns next token, its value and its location (line, column, offset). 745 | -- Sets state.line, state.line_offset, state.offset to token end location + 1. 746 | -- On error returns nil, error message, error location (line, column, offset), error end column. 747 | function lexer.next_token(state) 748 | local b = skip_space(state, sbyte(state.src, state.offset)) 749 | 750 | -- Save location of token start. 751 | local token_line = state.line 752 | local token_column = state.offset - state.line_offset + 1 753 | local token_offset = state.offset 754 | 755 | local token, token_value, err_offset, err_end_column 756 | 757 | if b == nil then 758 | token = "eof" 759 | else 760 | token, token_value, err_offset = (byte_handlers[b] or lex_any)(state, b) 761 | end 762 | 763 | if err_offset then 764 | local token_body = ssub(state.src, state.offset + err_offset, state.offset) 765 | token_value = token_value .. " " .. lexer.quote(token_body) 766 | token_line = state.line 767 | token_column = state.offset - state.line_offset + 1 + err_offset 768 | token_offset = state.offset + err_offset 769 | err_end_column = token_column + #token_body - 1 770 | end 771 | 772 | return token, token_value, token_line, token_column, token_offset, err_end_column or token_column 773 | end 774 | 775 | return lexer 776 | -------------------------------------------------------------------------------- /parser.lua: -------------------------------------------------------------------------------- 1 | local lexer = require "lexer" 2 | 3 | local function new_state(src) 4 | return { 5 | lexer = lexer.new_state(src), 6 | code_lines = {}, -- Set of line numbers containing code. 7 | comments = {}, -- Array of {comment = string, location = location}. 8 | hanging_semicolons = {} -- Array of locations of semicolons not following a statement. 9 | } 10 | end 11 | 12 | local function location(state) 13 | return setmetatable({ 14 | line = state.line, 15 | column = state.column, 16 | offset = state.offset 17 | }, {__tostring = function (t) 18 | local s = string.format("@line:%d column:%d offset:%d", t.line, t.column, t.offset) 19 | return s 20 | end}) 21 | end 22 | 23 | local function token_body_or_line(state) 24 | return state.lexer.src:sub(state.offset, state.lexer.offset - 1):match("^[^\r\n]*") 25 | end 26 | 27 | local function skip_token(state) 28 | while true do 29 | local err_end_column 30 | state.token, state.token_value, state.line, state.column, state.offset, err_end_column = lexer.next_token(state.lexer) 31 | 32 | if not state.token then 33 | lexer.syntax_error(state, err_end_column, state.token_value) 34 | elseif state.token == "comment" then 35 | state.comments[#state.comments+1] = { 36 | contents = state.token_value, 37 | location = location(state), 38 | end_column = state.column + #token_body_or_line(state) - 1 39 | } 40 | else 41 | state.code_lines[state.line] = true 42 | break 43 | end 44 | end 45 | end 46 | 47 | local function init_ast_node(node, loc, tag) 48 | node.location = loc 49 | node.tag = tag 50 | return node 51 | end 52 | 53 | local function new_ast_node(state, tag) 54 | return init_ast_node({}, location(state), tag) 55 | end 56 | 57 | local token_names = { 58 | eof = "", 59 | name = "identifier", 60 | ["do"] = "'do'", 61 | ["end"] = "'end'", 62 | ["then"] = "'then'", 63 | ["in"] = "'in'", 64 | ["until"] = "'until'", 65 | ["::"] = "'::'" 66 | } 67 | 68 | local function token_name(token) 69 | return token_names[token] or lexer.quote(token) 70 | end 71 | 72 | local function parse_error(state, msg) 73 | local token_repr, end_column 74 | 75 | if state.token == "eof" then 76 | token_repr = "" 77 | end_column = state.column 78 | else 79 | token_repr = token_body_or_line(state) 80 | end_column = state.column + #token_repr - 1 81 | token_repr = lexer.quote(token_repr) 82 | end 83 | 84 | lexer.syntax_error(state, end_column, msg .. " near " .. token_repr) 85 | end 86 | 87 | local function check_token(state, token) 88 | if state.token ~= token then 89 | parse_error(state, "expected " .. token_name(token)) 90 | end 91 | end 92 | 93 | local function check_and_skip_token(state, token) 94 | check_token(state, token) 95 | skip_token(state) 96 | end 97 | 98 | local function test_and_skip_token(state, token) 99 | if state.token == token then 100 | skip_token(state) 101 | return true 102 | end 103 | end 104 | 105 | local function check_closing_token(state, opening_token, closing_token, opening_line) 106 | if state.token ~= closing_token then 107 | local err = "expected " .. token_name(closing_token) 108 | 109 | if opening_line ~= state.line then 110 | err = err .. " (to close " .. token_name(opening_token) .. " on line " .. tostring(opening_line) .. ")" 111 | end 112 | 113 | parse_error(state, err) 114 | end 115 | 116 | skip_token(state) 117 | end 118 | 119 | local function check_name(state) 120 | check_token(state, "name") 121 | return state.token_value 122 | end 123 | 124 | -- If needed, wraps last expression in expressions in "Paren" node. 125 | local function opt_add_parens(expressions, is_inside_parentheses) 126 | if is_inside_parentheses then 127 | local last = expressions[#expressions] 128 | 129 | if last and last.tag == "Call" or last.tag == "Invoke" or last.tag == "Dots" then 130 | expressions[#expressions] = init_ast_node({last}, last.location, "Paren") 131 | end 132 | end 133 | end 134 | 135 | local parse_block, parse_expression 136 | 137 | local function parse_expression_list(state) 138 | local list = {} 139 | local is_inside_parentheses 140 | 141 | repeat 142 | list[#list+1], is_inside_parentheses = parse_expression(state) 143 | until not test_and_skip_token(state, ",") 144 | 145 | opt_add_parens(list, is_inside_parentheses) 146 | return list 147 | end 148 | 149 | local function parse_id(state, tag) 150 | local ast_node = new_ast_node(state, tag or "Id") 151 | ast_node[1] = check_name(state) 152 | skip_token(state) -- Skip name. 153 | return ast_node 154 | end 155 | 156 | local function atom(tag) 157 | return function(state) 158 | local ast_node = new_ast_node(state, tag) 159 | ast_node[1] = state.token_value 160 | skip_token(state) 161 | return ast_node 162 | end 163 | end 164 | 165 | local simple_expressions = {} 166 | 167 | simple_expressions.number = atom("Number") 168 | simple_expressions.string = atom("String") 169 | simple_expressions["nil"] = atom("Nil") 170 | simple_expressions["true"] = atom("True") 171 | simple_expressions["false"] = atom("False") 172 | simple_expressions["..."] = atom("Dots") 173 | 174 | simple_expressions["{"] = function(state) 175 | local ast_node = new_ast_node(state, "Table") 176 | local start_line = state.line 177 | skip_token(state) 178 | local is_inside_parentheses = false 179 | 180 | repeat 181 | if state.token == "}" then 182 | break 183 | else 184 | local lhs, rhs 185 | local item_location = location(state) 186 | local first_key_token 187 | 188 | if state.token == "name" then 189 | local name = state.token_value 190 | skip_token(state) -- Skip name. 191 | 192 | if test_and_skip_token(state, "=") then 193 | -- `name` = `expr`. 194 | first_key_token = name 195 | lhs = init_ast_node({name}, item_location, "String") 196 | rhs, is_inside_parentheses = parse_expression(state) 197 | else 198 | -- `name` is beginning of an expression in array part. 199 | -- Backtrack lexer to before name. 200 | state.lexer.line = item_location.line 201 | state.lexer.line_offset = item_location.offset-item_location.column+1 202 | state.lexer.offset = item_location.offset 203 | skip_token(state) -- Load name again. 204 | rhs, is_inside_parentheses = parse_expression(state, nil, true) 205 | end 206 | elseif state.token == "[" then 207 | -- [ `expr` ] = `expr`. 208 | item_location = location(state) 209 | first_key_token = "[" 210 | skip_token(state) 211 | lhs = parse_expression(state) 212 | check_closing_token(state, "[", "]", item_location.line) 213 | check_and_skip_token(state, "=") 214 | rhs = parse_expression(state) 215 | else 216 | -- Expression in array part. 217 | rhs, is_inside_parentheses = parse_expression(state, nil, true) 218 | end 219 | 220 | if lhs then 221 | -- Pair. 222 | ast_node[#ast_node+1] = init_ast_node({lhs, rhs, first_token = first_key_token}, item_location, "Pair") 223 | else 224 | -- Array part item. 225 | ast_node[#ast_node+1] = rhs 226 | end 227 | end 228 | until not (test_and_skip_token(state, ",") or test_and_skip_token(state, ";")) 229 | 230 | check_closing_token(state, "{", "}", start_line) 231 | opt_add_parens(ast_node, is_inside_parentheses) 232 | return ast_node 233 | end 234 | 235 | -- Parses argument list and the statements. 236 | local function parse_function(state, func_location) 237 | local paren_line = state.line 238 | check_and_skip_token(state, "(") 239 | local args = {} 240 | 241 | if state.token ~= ")" then -- Are there arguments? 242 | repeat 243 | if state.token == "name" then 244 | args[#args+1] = parse_id(state) 245 | elseif state.token == "..." then 246 | args[#args+1] = simple_expressions["..."](state) 247 | break 248 | else 249 | parse_error(state, "expected argument") 250 | end 251 | until not test_and_skip_token(state, ",") 252 | end 253 | 254 | check_closing_token(state, "(", ")", paren_line) 255 | local body = parse_block(state) 256 | local end_location = location(state) 257 | check_closing_token(state, "function", "end", func_location.line) 258 | return init_ast_node({args, body, end_location = end_location}, func_location, "Function") 259 | end 260 | 261 | simple_expressions["function"] = function(state) 262 | local function_location = location(state) 263 | skip_token(state) -- Skip "function". 264 | return parse_function(state, function_location) 265 | end 266 | 267 | local calls = {} 268 | 269 | calls["("] = function(state) 270 | local paren_line = state.line 271 | skip_token(state) -- Skip "(". 272 | local args = (state.token == ")") and {} or parse_expression_list(state) 273 | check_closing_token(state, "(", ")", paren_line) 274 | return args 275 | end 276 | 277 | calls["{"] = function(state) 278 | return {simple_expressions[state.token](state)} 279 | end 280 | 281 | calls.string = calls["{"] 282 | 283 | local suffixes = {} 284 | 285 | suffixes["."] = function(state, lhs) 286 | skip_token(state) -- Skip ".". 287 | local rhs = parse_id(state, "String") 288 | return init_ast_node({lhs, rhs}, lhs.location, "Index") 289 | end 290 | 291 | suffixes["["] = function(state, lhs) 292 | local bracket_line = state.line 293 | skip_token(state) -- Skip "[". 294 | local rhs = parse_expression(state) 295 | check_closing_token(state, "[", "]", bracket_line) 296 | return init_ast_node({lhs, rhs}, lhs.location, "Index") 297 | end 298 | 299 | suffixes[":"] = function(state, lhs) 300 | skip_token(state) -- Skip ":". 301 | local method_name = parse_id(state, "String") 302 | local args = (calls[state.token] or parse_error)(state, "expected method arguments") 303 | table.insert(args, 1, lhs) 304 | table.insert(args, 2, method_name) 305 | return init_ast_node(args, lhs.location, "Invoke") 306 | end 307 | 308 | suffixes["("] = function(state, lhs) 309 | local args = calls[state.token](state) 310 | table.insert(args, 1, lhs) 311 | return init_ast_node(args, lhs.location, "Call") 312 | end 313 | 314 | suffixes["{"] = suffixes["("] 315 | suffixes.string = suffixes["("] 316 | 317 | -- Additionally returns whether the expression is inside parens and the first non-paren token. 318 | local function parse_simple_expression(state, kind, no_literals) 319 | local expression, first_token 320 | local in_parens = false 321 | 322 | if state.token == "(" then 323 | in_parens = true 324 | local paren_line = state.line 325 | skip_token(state) 326 | local _ 327 | expression, _, first_token = parse_expression(state) 328 | check_closing_token(state, "(", ")", paren_line) 329 | elseif state.token == "name" then 330 | expression = parse_id(state) 331 | first_token = expression[1] 332 | else 333 | local literal_handler = simple_expressions[state.token] 334 | 335 | if not literal_handler or no_literals then 336 | parse_error(state, "expected " .. (kind or "expression")) 337 | end 338 | 339 | first_token = token_body_or_line(state) 340 | return literal_handler(state), false, first_token 341 | end 342 | 343 | while true do 344 | local suffix_handler = suffixes[state.token] 345 | 346 | if suffix_handler then 347 | in_parens = false 348 | expression = suffix_handler(state, expression) 349 | else 350 | return expression, in_parens, first_token 351 | end 352 | end 353 | end 354 | 355 | local unary_operators = { 356 | ["not"] = "not", 357 | ["-"] = "unm", -- Not mentioned in Metalua documentation. 358 | ["~"] = "bnot", 359 | ["#"] = "len" 360 | } 361 | 362 | local unary_priority = 12 363 | 364 | local binary_operators = { 365 | ["+"] = "add", ["-"] = "sub", 366 | ["*"] = "mul", ["%"] = "mod", 367 | ["^"] = "pow", 368 | ["/"] = "div", ["//"] = "idiv", 369 | ["&"] = "band", ["|"] = "bor", ["~"] = "bxor", 370 | ["<<"] = "shl", [">>"] = "shr", 371 | [".."] = "concat", 372 | ["~="] = "ne", ["=="] = "eq", 373 | ["<"] = "lt", ["<="] = "le", 374 | [">"] = "gt", [">="] = "ge", 375 | ["and"] = "and", ["or"] = "or" 376 | } 377 | 378 | local left_priorities = { 379 | add = 10, sub = 10, 380 | mul = 11, mod = 11, 381 | pow = 14, 382 | div = 11, idiv = 11, 383 | band = 6, bor = 4, bxor = 5, 384 | shl = 7, shr = 7, 385 | concat = 9, 386 | ne = 3, eq = 3, 387 | lt = 3, le = 3, 388 | gt = 3, ge = 3, 389 | ["and"] = 2, ["or"] = 1 390 | } 391 | 392 | local right_priorities = { 393 | add = 10, sub = 10, 394 | mul = 11, mod = 11, 395 | pow = 13, 396 | div = 11, idiv = 11, 397 | band = 6, bor = 4, bxor = 5, 398 | shl = 7, shr = 7, 399 | concat = 8, 400 | ne = 3, eq = 3, 401 | lt = 3, le = 3, 402 | gt = 3, ge = 3, 403 | ["and"] = 2, ["or"] = 1 404 | } 405 | 406 | -- Additionally returns whether subexpression is inside parentheses, and its first non-paren token. 407 | local function parse_subexpression(state, limit, kind) 408 | local expression 409 | local first_token 410 | local in_parens = false 411 | local unary_operator = unary_operators[state.token] 412 | 413 | if unary_operator then 414 | first_token = state.token 415 | local unary_location = location(state) 416 | skip_token(state) -- Skip operator. 417 | local unary_operand = parse_subexpression(state, unary_priority) 418 | expression = init_ast_node({unary_operator, unary_operand}, unary_location, "Op") 419 | else 420 | expression, in_parens, first_token = parse_simple_expression(state, kind) 421 | end 422 | 423 | -- Expand while operators have priorities higher than `limit`. 424 | while true do 425 | local binary_operator = binary_operators[state.token] 426 | 427 | if not binary_operator or left_priorities[binary_operator] <= limit then 428 | break 429 | end 430 | 431 | in_parens = false 432 | skip_token(state) -- Skip operator. 433 | -- Read subexpression with higher priority. 434 | local subexpression = parse_subexpression(state, right_priorities[binary_operator]) 435 | expression = init_ast_node({binary_operator, expression, subexpression}, expression.location, "Op") 436 | end 437 | 438 | return expression, in_parens, first_token 439 | end 440 | 441 | -- Additionally returns whether expression is inside parentheses and the first non-paren token. 442 | function parse_expression(state, kind, save_first_token) 443 | local expression, in_parens, first_token = parse_subexpression(state, 0, kind) 444 | expression.first_token = save_first_token and first_token 445 | return expression, in_parens, first_token 446 | end 447 | 448 | local statements = {} 449 | 450 | statements["if"] = function(state, loc) 451 | local start_line, start_token 452 | local next_line, next_token = loc.line, "if" 453 | local ast_node = init_ast_node({}, loc, "If") 454 | 455 | repeat 456 | ast_node[#ast_node+1] = parse_expression(state, "condition", true) 457 | local branch_location = location(state) 458 | check_and_skip_token(state, "then") 459 | ast_node[#ast_node+1] = parse_block(state, branch_location) 460 | start_line, start_token = next_line, next_token 461 | next_line, next_token = state.line, state.token 462 | until not test_and_skip_token(state, "elseif") 463 | 464 | if state.token == "else" then 465 | start_line, start_token = next_line, next_token 466 | local branch_location = location(state) 467 | skip_token(state) 468 | ast_node[#ast_node+1] = parse_block(state, branch_location) 469 | end 470 | 471 | check_closing_token(state, start_token, "end", start_line) 472 | return ast_node 473 | end 474 | 475 | statements["while"] = function(state, loc) 476 | local condition = parse_expression(state, "condition") 477 | check_and_skip_token(state, "do") 478 | local block = parse_block(state) 479 | check_closing_token(state, "while", "end", loc.line) 480 | return init_ast_node({condition, block}, loc, "While") 481 | end 482 | 483 | statements["do"] = function(state, loc) 484 | local ast_node = init_ast_node(parse_block(state), loc, "Do") 485 | check_closing_token(state, "do", "end", loc.line) 486 | return ast_node 487 | end 488 | 489 | statements["for"] = function(state, loc) 490 | local ast_node = init_ast_node({}, loc) -- Will set ast_node.tag later. 491 | local first_var = parse_id(state) 492 | 493 | if state.token == "=" then 494 | -- Numeric "for" loop. 495 | ast_node.tag = "Fornum" 496 | skip_token(state) 497 | ast_node[1] = first_var 498 | ast_node[2] = parse_expression(state) 499 | check_and_skip_token(state, ",") 500 | ast_node[3] = parse_expression(state) 501 | 502 | if test_and_skip_token(state, ",") then 503 | ast_node[4] = parse_expression(state) 504 | end 505 | 506 | check_and_skip_token(state, "do") 507 | ast_node[#ast_node+1] = parse_block(state) 508 | elseif state.token == "," or state.token == "in" then 509 | -- Generic "for" loop. 510 | ast_node.tag = "Forin" 511 | 512 | local iter_vars = {first_var} 513 | while test_and_skip_token(state, ",") do 514 | iter_vars[#iter_vars+1] = parse_id(state) 515 | end 516 | 517 | ast_node[1] = iter_vars 518 | check_and_skip_token(state, "in") 519 | ast_node[2] = parse_expression_list(state) 520 | check_and_skip_token(state, "do") 521 | ast_node[3] = parse_block(state) 522 | else 523 | parse_error(state, "expected '=', ',' or 'in'") 524 | end 525 | 526 | check_closing_token(state, "for", "end", loc.line) 527 | return ast_node 528 | end 529 | 530 | statements["repeat"] = function(state, loc) 531 | local block = parse_block(state) 532 | check_closing_token(state, "repeat", "until", loc.line) 533 | local condition = parse_expression(state, "condition", true) 534 | return init_ast_node({block, condition}, loc, "Repeat") 535 | end 536 | 537 | statements["function"] = function(state, loc) 538 | local lhs_location = location(state) 539 | local lhs = parse_id(state) 540 | local self_location 541 | 542 | while (not self_location) and (state.token == "." or state.token == ":") do 543 | self_location = state.token == ":" and location(state) 544 | skip_token(state) -- Skip "." or ":". 545 | lhs = init_ast_node({lhs, parse_id(state, "String")}, lhs_location, "Index") 546 | end 547 | 548 | local function_node = parse_function(state, loc) 549 | 550 | if self_location then 551 | -- Insert implicit "self" argument. 552 | local self_arg = init_ast_node({"self", implicit = true}, self_location, "Id") 553 | table.insert(function_node[1], 1, self_arg) 554 | end 555 | 556 | return init_ast_node({{lhs}, {function_node}}, loc, "Set") 557 | end 558 | 559 | statements["local"] = function(state, loc) 560 | if state.token == "function" then 561 | -- Localrec 562 | local function_location = location(state) 563 | skip_token(state) -- Skip "function". 564 | local var = parse_id(state) 565 | local function_node = parse_function(state, function_location) 566 | -- Metalua would return {{var}, {function}} for some reason. 567 | return init_ast_node({var, function_node}, loc, "Localrec") 568 | end 569 | 570 | local lhs = {} 571 | local rhs 572 | 573 | repeat 574 | lhs[#lhs+1] = parse_id(state) 575 | until not test_and_skip_token(state, ",") 576 | 577 | local equals_location = location(state) 578 | 579 | if test_and_skip_token(state, "=") then 580 | rhs = parse_expression_list(state) 581 | end 582 | 583 | -- According to Metalua spec, {lhs} should be returned if there is no rhs. 584 | -- Metalua does not follow the spec itself and returns {lhs, {}}. 585 | return init_ast_node({lhs, rhs, equals_location = rhs and equals_location}, loc, "Local") 586 | end 587 | 588 | statements["::"] = function(state, loc) 589 | local end_column = loc.column + 1 590 | local name = check_name(state) 591 | 592 | if state.line == loc.line then 593 | -- Label name on the same line as opening `::`, pull token end to name end. 594 | end_column = state.column + #state.token_value - 1 595 | end 596 | 597 | skip_token(state) -- Skip label name. 598 | 599 | if state.line == loc.line then 600 | -- Whole label is on one line, pull token end to closing `::` end. 601 | end_column = state.column + 1 602 | end 603 | 604 | check_and_skip_token(state, "::") 605 | return init_ast_node({name, end_column = end_column}, loc, "Label") 606 | end 607 | 608 | local closing_tokens = { 609 | ["end"] = true, ["eof"] = true, ["else"] = true, ["elseif"] = true, ["until"] = true, 610 | } 611 | 612 | statements["return"] = function(state, loc) 613 | if closing_tokens[state.token] or state.token == ";" then 614 | -- No return values. 615 | return init_ast_node({}, loc, "Return") 616 | else 617 | return init_ast_node(parse_expression_list(state), loc, "Return") 618 | end 619 | end 620 | 621 | statements["break"] = function(_, loc) 622 | return init_ast_node({}, loc, "Break") 623 | end 624 | 625 | statements["goto"] = function(state, loc) 626 | local name = check_name(state) 627 | skip_token(state) -- Skip label name. 628 | return init_ast_node({name}, loc, "Goto") 629 | end 630 | 631 | local function parse_expression_statement(state, loc) 632 | local lhs 633 | 634 | repeat 635 | local first_loc = lhs and location(state) or loc 636 | local expected = lhs and "identifier or field" or "statement" 637 | local primary_expression, in_parens = parse_simple_expression(state, expected, true) 638 | 639 | if in_parens then 640 | -- (expr) is invalid. 641 | lexer.syntax_error(first_loc, first_loc.column, "expected " .. expected .. " near '('") 642 | end 643 | 644 | if primary_expression.tag == "Call" or primary_expression.tag == "Invoke" then 645 | if lhs then 646 | -- This is an assingment, and a call is not a valid lvalue. 647 | parse_error(state, "expected call or indexing") 648 | else 649 | -- It is a call. 650 | primary_expression.location = loc 651 | return primary_expression 652 | end 653 | end 654 | 655 | -- This is an assignment. 656 | lhs = lhs or {} 657 | lhs[#lhs+1] = primary_expression 658 | until not test_and_skip_token(state, ",") 659 | 660 | local equals_location = location(state) 661 | check_and_skip_token(state, "=") 662 | local rhs = parse_expression_list(state) 663 | return init_ast_node({lhs, rhs, equals_location = equals_location}, loc, "Set") 664 | end 665 | 666 | local function parse_statement(state) 667 | local loc = location(state) 668 | local statement_parser = statements[state.token] 669 | 670 | if statement_parser then 671 | skip_token(state) 672 | return statement_parser(state, loc) 673 | else 674 | return parse_expression_statement(state, loc) 675 | end 676 | end 677 | 678 | function parse_block(state, loc) 679 | local block = {tag = "Block", location = loc} 680 | local after_statement = false 681 | 682 | while not closing_tokens[state.token] do 683 | local first_token = state.token 684 | 685 | if first_token == ";" then 686 | if not after_statement then 687 | table.insert(state.hanging_semicolons, location(state)) 688 | end 689 | 690 | skip_token(state) 691 | -- Do not allow several semicolons in a row, even if the first one is valid. 692 | after_statement = false 693 | else 694 | first_token = state.token_value or first_token 695 | local statement = parse_statement(state) 696 | after_statement = true 697 | statement.first_token = first_token 698 | block[#block+1] = statement 699 | 700 | if first_token == "return" then 701 | -- "return" must be the last statement. 702 | -- However, one ";" after it is allowed. 703 | test_and_skip_token(state, ";") 704 | 705 | if not closing_tokens[state.token] then 706 | parse_error(state, "expected end of block") 707 | end 708 | end 709 | end 710 | end 711 | 712 | return block 713 | end 714 | 715 | -- Parses source string. 716 | -- Returns AST (in almost MetaLua format), array of comments - tables {comment = string, location = location}, 717 | -- set of line numbers containing code, and array of locations of empty statements (semicolons). 718 | -- On error throws {line = line, column = column, end_column = end_column, msg = msg} 719 | local function parse(src) 720 | local state = new_state(src) 721 | skip_token(state) 722 | local ast = parse_block(state) 723 | check_token(state, "eof") 724 | return ast, state.comments, state.code_lines, state.hanging_semicolons 725 | end 726 | 727 | 728 | return parse -------------------------------------------------------------------------------- /serialize.lua: -------------------------------------------------------------------------------- 1 | local tinsert = table.insert 2 | local tconcat = table.concat 3 | local tsort = table.sort 4 | local sformat = string.format 5 | local srep = string.rep 6 | local tostring = tostring 7 | local type = type 8 | local pairs = pairs 9 | 10 | local INDENT_STR = " " 11 | 12 | local _table2str 13 | 14 | local function _value2str(v, indent, result_table, n) 15 | local tpv = type(v) 16 | if tpv == "table" then 17 | n=n+1; result_table[n] = "{\n" 18 | 19 | -- recursive 20 | n = _table2str(v, indent+1, result_table, n) 21 | 22 | for i=1,indent do 23 | n=n+1; result_table[n] = INDENT_STR 24 | end 25 | n=n+1; result_table[n] = "},\n" 26 | else 27 | n=n+1; result_table[n] = (tpv == "string" and sformat("%q", v) or tostring(v)) 28 | n=n+1; result_table[n] = ",\n" 29 | end 30 | 31 | return n 32 | end 33 | 34 | _table2str = function(lua_table, indent, result_table, n) 35 | indent = indent or 0 36 | 37 | local keys = {} 38 | local x = 0 39 | local is_array = true 40 | local max_index = 0 41 | local expect_index = 1 42 | for k, _ in pairs(lua_table) do 43 | x=x+1; keys[x] = k 44 | if is_array then 45 | if math.type(k) ~= 'integer' or k <= 0 then 46 | is_array = false 47 | else 48 | if k > max_index then 49 | max_index = k 50 | end 51 | if k == expect_index then 52 | expect_index = k + 1 53 | end 54 | end 55 | end 56 | end 57 | if is_array then 58 | for i = expect_index, max_index do 59 | if lua_table[i] == nil then 60 | is_array = false 61 | break 62 | end 63 | end 64 | end 65 | 66 | -- 纯数组 67 | if is_array then 68 | for i = 1, max_index do 69 | for i=1,indent do 70 | n=n+1; result_table[n] = INDENT_STR 71 | end 72 | n = _value2str(lua_table[i], indent, result_table, n) 73 | end 74 | return n 75 | end 76 | 77 | -- 非纯数组 78 | 79 | tsort(keys, function(a,b) 80 | if type(a) == type(b) then 81 | return a < b 82 | end 83 | if type(a) == "string" then 84 | return false 85 | elseif type(b) == "string" then 86 | return true 87 | else 88 | return a < b 89 | end 90 | end) 91 | 92 | for i = 1, x do 93 | local k = keys[i] 94 | local v = lua_table[k] 95 | 96 | -- indent 97 | for i=1,indent do 98 | n=n+1; result_table[n] = INDENT_STR 99 | end 100 | 101 | -- key 102 | n=n+1; result_table[n] = "[" 103 | n=n+1; result_table[n] = (type(k) == "string" and sformat("%q", k) or tostring(k)) 104 | n=n+1; result_table[n] = "] = " 105 | 106 | -- value 107 | n = _value2str(v, indent, result_table, n) 108 | end 109 | 110 | return n 111 | end 112 | 113 | local function serialize(lua_table) 114 | local _seri_table = {} 115 | local n = 0 -- length of _seri_table 116 | n=n+1; _seri_table[n] = '{\n' 117 | n = _table2str(lua_table, 1, _seri_table, n) 118 | n=n+1; _seri_table[n] = '}' 119 | 120 | return tconcat(_seri_table, '') 121 | end 122 | 123 | return serialize -------------------------------------------------------------------------------- /test.lua: -------------------------------------------------------------------------------- 1 | local parser = require "genlfunc" 2 | local print_r = require "print_r" 3 | 4 | local source = [[ 5 | function aa() 6 | return 33 7 | end 8 | 9 | function ee.cc:ff() 10 | return 3 11 | end 12 | 13 | local function foo() 14 | return 4 15 | end 16 | 17 | function foo.cc.ee () 18 | local function pp() 19 | local function _gg () 20 | local hh = function () 21 | end 22 | end 23 | end 24 | return 4 25 | end 26 | 27 | local gg = function () 28 | return 5 29 | end 30 | ]] 31 | 32 | 33 | local ret = parser(source) 34 | print_r(ret) 35 | -------------------------------------------------------------------------------- /utils.lua: -------------------------------------------------------------------------------- 1 | local lfs = require "lfs" 2 | local M = {} 3 | 4 | local function join(...) 5 | local t = {...} 6 | local sep = string.match (package.config, "[^\n]+") 7 | return table.concat(t, sep) 8 | end 9 | 10 | 11 | local function _f(lua_dir, out) 12 | out = out or {} 13 | for file in lfs.dir(lua_dir) do 14 | if file ~= "." and file ~= ".." then 15 | local f = join(lua_dir, file) 16 | local attr = lfs.attributes(f) 17 | if attr.mode == "directory" then 18 | _f(f, out) 19 | elseif string.match(file, ".+%.lua$") then 20 | out[#out+1] = { 21 | path = f, 22 | name = file, 23 | } 24 | end 25 | end 26 | end 27 | return out 28 | end 29 | 30 | function M.list_all_lua(lua_dir) 31 | local out = _f(lua_dir) 32 | local ret = {} 33 | local map = {} 34 | for i,v in ipairs(out) do 35 | local abs_path = v.path 36 | v.path = string.gsub(abs_path, lua_dir, "") 37 | ret[v.path] = v 38 | map[v.path] = { 39 | name = v.name, 40 | path = abs_path, 41 | } 42 | end 43 | return ret, map 44 | end 45 | 46 | M.join = join 47 | 48 | return M --------------------------------------------------------------------------------