├── LICENSE ├── README.md ├── __bin └── scilua.lua ├── __meta.lua ├── ast-boolean-const-eval.lua ├── ast-const-eval.lua ├── ast-validate.lua ├── bcread.lua ├── bcsave.lua ├── bytecode.lua ├── compile.lua ├── generator.lua ├── lexer.lua ├── lua-ast.lua ├── luacode-generator.lua ├── operator.lua ├── parser.lua ├── reader.lua ├── transform.lua └── util.lua /LICENSE: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | 3 | Sci-Lang: Syntax extensions to LuaJIT for scientific computing. 4 | 5 | Copyright (C) 2015-2016 Stefano Peluchetti. All rights reserved. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | 25 | [ MIT license: http://opensource.org/licenses/MIT ] 26 | 27 | =============================================================================== 28 | 29 | LuaJIT Language Toolkit, a toolkit for language implementations. 30 | 31 | Copyright (C) 2013-2014 Francesco Abbate. All rights reserved. 32 | 33 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ] 34 | 35 | Based on Nyanga's language implementation of Richard Hundt. Copyright 36 | license of Nyanga's original work: 37 | 38 | =============================================================================== 39 | 40 | Nyanga -- Modifiable OO Lua Dialect. http://github.com/richardhundt/nyanga 41 | 42 | Copyright (C) 2013-2014 Richard Hundt and contributors. All rights reserved. 43 | 44 | Permission is hereby granted, free of charge, to any person obtaining a copy 45 | of this software and associated documentation files (the "Software"), to deal 46 | in the Software without restriction, including without limitation the rights 47 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 48 | copies of the Software, and to permit persons to whom the Software is 49 | furnished to do so, subject to the following conditions: 50 | 51 | The above copyright notice and this permission notice shall be included in 52 | all copies or substantial portions of the Software. 53 | 54 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 55 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 56 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 57 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 58 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 59 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 60 | THE SOFTWARE. 61 | 62 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ] 63 | 64 | =============================================================================== 65 | 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | SciLua-Lang: Syntax extensions to LuaJIT for scientific computing 2 | ================================================================= 3 | 4 | Based on the [LuaJIT Language Toolkit](https://github.com/franko/luajit-lang-toolkit) this executable introduces extensions to the LuaJIT syntax for algebra operations. 5 | 6 | ## Features 7 | 8 | - algebra expressions constructed via empty bracket `[]` indexing 9 | - element-wise operations via plain Lua operators (`+-*/^%`) 10 | - matrix multiplication via `**` 11 | - matrix exponentiation via `^^` 12 | - transposition via `` ` `` 13 | - efficient implementation minimizes required allocations and loops 14 | - support for assignments 15 | 16 | ```lua 17 | -- Replicate rand_mat_stat from Julia's benchmark suite: 18 | local function randmatstat(t) 19 | local n = 5 20 | local v, w = alg.vec(t), alg.vec(t) 21 | for i=1,t do 22 | local a, b, c, d = randn(n, n), randn(n, n), randn(n, n), randn(n, n) 23 | local P = alg.join(a..b..c..d) 24 | local Q = alg.join(a..b, c..d) 25 | v[i] = alg.trace((P[]`**P[])^^4) -- Matrix transpose, product and power. 26 | w[i] = alg.trace((Q[]`**Q[])^^4) -- Matrix transpose, product and power. 27 | end 28 | return sqrt(stat.var(v))/stat.mean(v), sqrt(stat.var(w))/stat.mean(w) 29 | end 30 | ``` 31 | 32 | ## Install 33 | 34 | This module is included in the [ULua](http://ulua.io) distribution, to install it use: 35 | ``` 36 | upkg add sci-lang 37 | ``` 38 | 39 | Alternatively, manually install this module making sure that all dependencies listed in the `require` section of [`__meta.lua`](__meta.lua) are installed as well (dependencies starting with `clib_` are standard C dynamic libraries). 40 | 41 | ## Documentation 42 | 43 | Refer to the [official documentation](http://scilua.org). -------------------------------------------------------------------------------- /__bin/scilua.lua: -------------------------------------------------------------------------------- 1 | 2 | -- Set JIT default optimizations for sci. 3 | jit.opt.start('callunroll=10', 'loopunroll=30') 4 | 5 | local function usage() 6 | io.stderr:write[[ 7 | LuaJIT Language Toolkit usage: luajit [options]... [script [args]...]. 8 | 9 | Available options are: 10 | -b ... Save or list bytecode. 11 | -c ... Generate Lua code and run. 12 | If followed by the "v" option the generated Lua code 13 | will be printed. 14 | ]] 15 | os.exit(1) 16 | end 17 | 18 | local function check(success, result) 19 | if not success then 20 | io.stderr:write(result .. "\n") 21 | os.exit(1) 22 | else 23 | return result 24 | end 25 | end 26 | 27 | local filename 28 | 29 | local args = {...} 30 | local opt = {} 31 | local k = 1 32 | while args[k] do 33 | local a = args[k] 34 | if string.sub(args[k], 1, 1) == "-" then 35 | if string.sub(a, 2, 2) == "b" then 36 | local j = 1 37 | if #a > 2 then 38 | args[j] = "-" .. string.sub(a, 3) 39 | j = j + 1 40 | else 41 | table.remove(args, j) 42 | end 43 | require("sci-lang.bcsave").start(unpack(args)) 44 | os.exit(0) 45 | elseif string.sub(a, 2, 2) == "c" then 46 | opt.code = true 47 | local copt = string.sub(a, 3, 3) 48 | if copt == "v" then 49 | opt.debug = true 50 | elseif copt ~= "" then 51 | print("Invalid Lua code option: ", copt) 52 | usage() 53 | end 54 | elseif string.sub(a, 2, 2) == "v" then 55 | opt.debug = true 56 | else 57 | print("Invalid option: ", args[k]) 58 | usage() 59 | end 60 | else 61 | filename = args[k] 62 | end 63 | k = k + 1 64 | end 65 | 66 | if not filename then usage() end 67 | 68 | local compile = require("sci-lang.compile") 69 | 70 | -- Compute the bytecode string for the given filename. 71 | local luacode = check(compile.file(filename, opt)) 72 | if opt.debug then 73 | print(luacode) 74 | print('\n\nOutput:') 75 | end 76 | local fn = assert(loadstring(luacode)) 77 | fn() 78 | 79 | -------------------------------------------------------------------------------- /__meta.lua: -------------------------------------------------------------------------------- 1 | 2 | return { 3 | name = "sci-lang", 4 | version = "1.0.beta10", 5 | require = { 6 | luajit = "2.0", 7 | sci = "1.0.beta10", 8 | }, 9 | homepage = "https://github.com/stepelu/lua-sci-lang", 10 | license = "MIT/X11", 11 | description = "Syntax extensions to LuaJIT for scientific computing", 12 | } -------------------------------------------------------------------------------- /ast-boolean-const-eval.lua: -------------------------------------------------------------------------------- 1 | local BoolConstRule = { } 2 | 3 | -- A function that return a numeric constant if an AST node evaluate to an 4 | -- arithmetic constant or "nil" otherwise. 5 | -- The implementation of the function is given below. 6 | local const_eval 7 | 8 | local function dirop_compute(o, a, b) 9 | if o == 'and' then return a and b 10 | elseif o == 'or' then return a or b 11 | end 12 | end 13 | 14 | function BoolConstRule.Literal(node) 15 | local v = node.value 16 | if type(v) == 'boolean' then return v end 17 | end 18 | 19 | function BoolConstRule.BinaryExpression(node) 20 | local o = node.operator 21 | local a = const_eval(node.left) 22 | if a ~= nil then 23 | local b = const_eval(node.right) 24 | if b ~= nil then 25 | return dirop_compute(o, a, b) 26 | end 27 | end 28 | end 29 | 30 | function BoolConstRule.UnaryExpression(node) 31 | local o = node.operator 32 | if o == 'not' then 33 | local v = const_eval(node.argument) 34 | if v ~= nil then return not v end 35 | end 36 | end 37 | 38 | function const_eval(node) 39 | local rule = BoolConstRule[node.kind] 40 | if rule then 41 | return rule(node) 42 | end 43 | end 44 | 45 | return const_eval 46 | -------------------------------------------------------------------------------- /ast-const-eval.lua: -------------------------------------------------------------------------------- 1 | local ConstRule = { } 2 | 3 | -- A function that return a numeric constant if an AST node evaluate to an 4 | -- arithmetic constant or "nil" otherwise. 5 | -- The implementation of the function is given below. 6 | local const_eval 7 | 8 | local function dirop_compute(o, a, b) 9 | if o == '+' then return a + b 10 | elseif o == '-' then return a - b 11 | elseif o == '*' then return a * b 12 | elseif o == '/' then return (a ~= 0 or b ~= 0) and (a / b) or nil 13 | elseif o == '%' then return a % b 14 | elseif o == '^' then return a ^ b 15 | end 16 | end 17 | 18 | function ConstRule.Literal(node) 19 | local v = node.value 20 | if type(v) == 'number' then return v end 21 | end 22 | 23 | function ConstRule.BinaryExpression(node) 24 | local o = node.operator 25 | local a = const_eval(node.left) 26 | if a then 27 | local b = const_eval(node.right) 28 | if b then 29 | return dirop_compute(o, a, b) 30 | end 31 | end 32 | end 33 | 34 | function ConstRule.UnaryExpression(node) 35 | local o = node.operator 36 | if o == '-' then 37 | local v = const_eval(node.argument) 38 | if v then return -v end 39 | end 40 | end 41 | 42 | function const_eval(node) 43 | local rule = ConstRule[node.kind] 44 | if rule then 45 | return rule(node) 46 | end 47 | end 48 | 49 | return const_eval 50 | -------------------------------------------------------------------------------- /ast-validate.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Each entry of "syntax" describe a node of the AST tree. 3 | -- The "properties" field gives the specification for the properties 4 | -- of each node. 5 | -- 6 | -- Each "properties" entry is of the form: 7 | -- 8 | -- = 9 | -- 10 | -- where is a recursive type defined as follow: 11 | -- it can be: 12 | -- 13 | -- "Expression", 14 | -- "Statement", 15 | -- ... 16 | -- to indicate a specific kind of "node". Alternatively a node can be 17 | -- specified as; 18 | -- 19 | -- { type = "node", kind = "Statement" } 20 | -- 21 | -- In addition an can be also: 22 | -- 23 | -- { type = "literal", value = "string" } 24 | -- 25 | -- { type = "enum", values = {"a", "b", "c"} } 26 | -- 27 | -- { type = "list", kind = } 28 | -- 29 | -- { type = "choice", values = {, , ...} } 30 | -- 31 | -- The latter two are defined recursively. A "list" is Lua table of element of a 32 | -- given type. The "choice" allow an element to be either of one type or another. 33 | -- 34 | 35 | local syntax = { 36 | Node = { 37 | kind = "Node", 38 | abstract = true 39 | }, 40 | Expression = { 41 | kind = "Expression", 42 | base = "Node", 43 | abstract = true, 44 | }, 45 | Statement = { 46 | kind = "Statement", 47 | base = "Node", 48 | abstract = true, 49 | }, 50 | Chunk = { 51 | kind = "Chunk", 52 | base = "Node", 53 | properties = { 54 | body = { 55 | type = "list", 56 | kind = "Statement" 57 | }, 58 | chunkname = { type = "literal", value = "string" }, 59 | } 60 | }, 61 | Identifier = { 62 | kind = "Identifier", 63 | base = "Expression", 64 | properties = { 65 | name = { type = "literal", value = "string" }, 66 | } 67 | }, 68 | Vararg = { 69 | kind = "Vararg", 70 | base = "Identifier", 71 | properties = { } 72 | }, 73 | BinaryExpression = { 74 | kind = "BinaryExpression", 75 | base = "Expression", 76 | properties = { 77 | operator = { 78 | type = "enum", 79 | values = { 80 | "+", "-", "*", "/", "^", "%", 81 | "==", "~=", ">=", ">", "<=", "<", 82 | } 83 | }, 84 | left = "Expression", 85 | right = "Expression", 86 | } 87 | }, 88 | BinaryAlgebraExpression = { 89 | kind = "BinaryAlgebraExpression", 90 | base = "BinaryExpression", 91 | properties = { 92 | operator = { 93 | type = "enum", 94 | values = { 95 | "+", "-", "*", "/", "^", "%", 96 | "==", "~=", ">=", ">", "<=", "<", 97 | "**", "^^", 98 | } 99 | }, 100 | left = "Expression", 101 | right = "Expression", 102 | } 103 | }, 104 | ConcatenateExpression = { 105 | kind = "ConcatenateExpression", 106 | base = "Expression", 107 | properties = { 108 | terms = { 109 | type = "list", 110 | kind = "Expression", 111 | } 112 | } 113 | }, 114 | UnaryExpression = { 115 | kind = "UnaryExpression", 116 | base = "Expression", 117 | properties = { 118 | operator = { 119 | type = "enum", 120 | values = { "not", "-", "#" }, 121 | }, 122 | argument = "Expression", 123 | } 124 | }, 125 | UnaryAlgebraExpression = { 126 | kind = "UnaryAlgebraExpression", 127 | base = "UnaryExpression", 128 | properties = { 129 | operator = { 130 | type = "enum", 131 | values = { "not", "-", "#", "`" }, 132 | }, 133 | argument = "Expression", 134 | } 135 | }, 136 | ExpressionValue = { 137 | kind = "ExpressionValue", 138 | base = "Expression", 139 | properties = { 140 | value = "Expression", 141 | } 142 | }, 143 | AssignmentExpression = { 144 | kind = "AssignmentExpression", 145 | base = "Statement", 146 | properties = { 147 | left = { 148 | type = "list", 149 | kind = { type = "choice", values = { "MemberExpression", "Identifier" } }, 150 | }, 151 | right = { 152 | type = "list", 153 | kind = "Expression", 154 | } 155 | } 156 | }, 157 | AssignmentAlgebraExpression = { 158 | kind = "AssignmentAlgebraExpression", 159 | base = "AssignmentExpression", 160 | properties = { 161 | left = { 162 | type = "list", 163 | kind = { type = "choice", values = { "MemberExpression", "Identifier" } }, 164 | }, 165 | right = { 166 | type = "list", 167 | kind = "Expression", 168 | } 169 | } 170 | }, 171 | LogicalExpression = { 172 | kind = "LogicalExpression", 173 | base = "Expression", 174 | properties = { 175 | operator = { 176 | type = "enum", 177 | values = { "and", "or" } 178 | }, 179 | left = "Expression", 180 | right = "Expression", 181 | } 182 | }, 183 | MemberExpression = { 184 | kind = "MemberExpression", 185 | base = "Expression", 186 | properties = { 187 | object = "Expression", 188 | property = "Expression", 189 | computed = { 190 | type = "literal", 191 | value = "boolean", 192 | default = false 193 | }, 194 | } 195 | }, 196 | IndexAlgebraExpression = { 197 | kind = "IndexAlgebraExpression", 198 | base = "MemberExpression", 199 | properties = { 200 | object = "Expression", 201 | } 202 | }, 203 | CallExpression = { 204 | kind = "CallExpression", 205 | base = "Expression", 206 | properties = { 207 | callee = "Expression", 208 | arguments = { type = "list", kind = "Expression" }, 209 | } 210 | }, 211 | SendExpression = { 212 | kind = "SendExpression", 213 | base = "Expression", 214 | properties = { 215 | receiver = "Expression", 216 | method = "Identifier", 217 | arguments = { 218 | type = "list", 219 | kind = "Expression" 220 | } 221 | } 222 | }, 223 | Literal = { 224 | kind = "Literal", 225 | base = "Expression", 226 | properties = { 227 | value = { 228 | type = "choice", 229 | values = { 230 | { type = "literal", value = "string" }, 231 | { type = "literal", value = "number" }, 232 | { type = "literal", value = "nil" }, 233 | { type = "literal", value = "boolean" }, 234 | { type = "literal", value = "cdata" }, 235 | } 236 | }, 237 | } 238 | }, 239 | Table = { 240 | kind = "Table", 241 | base = "Expression", 242 | properties = { 243 | array_entries = { 244 | type = "list", 245 | kind = "Expression", 246 | }, 247 | hash_keys = { 248 | type = "list", 249 | kind = "Expression", 250 | }, 251 | hash_values = { 252 | type = "list", 253 | kind = "Expression", 254 | }, 255 | } 256 | }, 257 | ExpressionStatement = { 258 | kind = "ExpressionStatement", 259 | base = "Statement", 260 | properties = { 261 | expression = { 262 | type = "choice", 263 | values = { "Statement", "Expression" }, 264 | } 265 | } 266 | }, 267 | EmptyStatement = { 268 | kind = "EmptyStatement", 269 | base = "Statement", 270 | properties = { }, 271 | }, 272 | DoStatement = { 273 | kind = "DoStatement", 274 | base = "Statement", 275 | properties = { 276 | body = { 277 | type = "list", 278 | kind = "Statement", 279 | } 280 | } 281 | }, 282 | IfStatement = { 283 | kind = "IfStatement", 284 | base = "Statement", 285 | properties = { 286 | tests = { 287 | type = "list", 288 | kind = "Expression", 289 | }, 290 | cons = { 291 | type = "list", 292 | kind = { type = "list", kind = "Statement" }, 293 | }, 294 | alternate = { 295 | type = "list", 296 | kind = "Statement", 297 | optional = true, 298 | } 299 | } 300 | }, 301 | LabelStatement = { 302 | kind = "LabelStatement", 303 | base = "Statement", 304 | properties = { 305 | label = { type = "literal", value = "string" }, 306 | } 307 | }, 308 | GotoStatement = { 309 | kind = "GotoStatement", 310 | base = "Statement", 311 | properties = { 312 | label = { type = "literal", value = "string" } 313 | } 314 | }, 315 | BreakStatement = { 316 | kind = "BreakStatement", 317 | base = "Statement", 318 | properties = { }, 319 | }, 320 | ReturnStatement = { 321 | kind = "ReturnStatement", 322 | base = "Statement", 323 | properties = { 324 | arguments = { 325 | type = "list", 326 | kind = "Expression" 327 | } 328 | } 329 | }, 330 | WhileStatement = { 331 | kind = "WhileStatement", 332 | base = "Statement", 333 | properties = { 334 | test = "Expression", 335 | body = { 336 | type = "list", 337 | kind = "Statement" 338 | } 339 | } 340 | }, 341 | RepeatStatement = { 342 | kind = "RepeatStatement", 343 | base = "Statement", 344 | properties = { 345 | test = "Expression", 346 | body = { 347 | type = "list", 348 | kind = "Statement", 349 | }, 350 | } 351 | }, 352 | ForInit = { 353 | kind = "ForInit", 354 | base = "Expression", 355 | properties = { 356 | id = "Identifier", 357 | value = "Expression", 358 | } 359 | }, 360 | ForStatement = { 361 | kind = "ForStatement", 362 | base = "Statement", 363 | properties = { 364 | init = "ForInit", 365 | last = "Expression", 366 | step = { 367 | type = "node", 368 | kind = "Expression", 369 | optional = true, 370 | }, 371 | body = { 372 | type = "list", 373 | kind = "Statement", 374 | }, 375 | } 376 | }, 377 | ForNames = { 378 | kind = "ForNames", 379 | base = "Expression", 380 | properties = { 381 | names = { 382 | type = "list", 383 | kind = "Identifier", 384 | } 385 | } 386 | }, 387 | ForInStatement = { 388 | kind = "ForInStatement", 389 | base = "Statement", 390 | properties = { 391 | namelist = "ForNames", 392 | explist = { 393 | type = "list", 394 | kind = "Expression" 395 | }, 396 | body = { 397 | type = "list", 398 | kind = "Statement", 399 | }, 400 | } 401 | }, 402 | LocalDeclaration = { 403 | kind = "LocalDeclaration", 404 | base = "Statement", 405 | properties = { 406 | names = { 407 | type = "list", 408 | kind = "Identifier" 409 | }, 410 | expressions = { 411 | type = "list", 412 | kind = "Expression" 413 | } 414 | } 415 | }, 416 | FunctionDeclaration = { 417 | kind = "FunctionDeclaration", 418 | base = "Statement", 419 | properties = { 420 | id = { 421 | type = "choice", 422 | values = { "MemberExpression", "Identifier" }, 423 | }, 424 | body = { 425 | type = "list", 426 | kind = "Statement", 427 | }, 428 | params = { 429 | type = "list", 430 | kind = "Identifier", 431 | }, 432 | vararg = { 433 | type = "literal", 434 | value = "boolean", 435 | default = false 436 | }, 437 | locald = { 438 | type = "literal", 439 | value = "boolean", 440 | default = false 441 | } 442 | } 443 | }, 444 | FunctionExpression = { 445 | kind = "FunctionExpression", 446 | base = "Expression", 447 | properties = { 448 | body = { 449 | type = "list", 450 | kind = "Statement", 451 | }, 452 | params = { 453 | type = "list", 454 | kind = "Identifier", 455 | }, 456 | vararg = { 457 | type = "literal", 458 | value = "boolean", 459 | default = false 460 | } 461 | } 462 | } 463 | } 464 | 465 | local check 466 | 467 | local function iskind(prop, tag) 468 | if type(prop) ~= "table" then 469 | return false 470 | end 471 | local meta = syntax[prop.kind] 472 | while meta do 473 | if meta.kind == tag then 474 | return true 475 | end 476 | meta = syntax[meta.base] 477 | end 478 | return false 479 | end 480 | 481 | local function isnode(prop) 482 | return iskind(prop, "Node") 483 | end 484 | 485 | local function kind2str(spec) 486 | if type(spec) == "string" then 487 | return spec 488 | elseif spec.type == "node" then 489 | return spec.kind 490 | elseif spec.type == "list" then 491 | local etype = kind2str(spec.kind) 492 | return "list of " .. etype 493 | elseif spec.type == "enum" then 494 | local ls = {} 495 | for i = 1, #spec.values do ls[i] = spec.values[i] end 496 | return table.concat(ls, ", ") 497 | elseif spec.type == "literal" then 498 | return "literal " .. spec.value 499 | elseif spec.type == "choice" then 500 | local ls = {} 501 | for i = 1, #spec.values do ls[i] = kind2str(spec.values[i]) end 502 | return table.concat(ls, "|") 503 | else 504 | error("internal error: invalid spec type") 505 | end 506 | end 507 | 508 | local function check_node(tag, prop) 509 | if not isnode(prop) then 510 | return false, "expected Node" 511 | end 512 | if not iskind(prop, tag) then 513 | return false, "expected " .. tag 514 | end 515 | return true 516 | end 517 | 518 | local function check_list(spec, prop) 519 | if type(prop) ~= "table" then 520 | return false, "expected list of "..kind2str(spec.kind).." (got "..type(prop)..")" 521 | end 522 | if isnode(prop) then 523 | return false, "expected list of "..kind2str(spec.kind).." (got node)" 524 | end 525 | for i=1, #prop do 526 | local ok, err = check(spec.kind, prop[i]) 527 | if not ok then 528 | return false, err.." (got "..prop[i].kind..")" 529 | end 530 | end 531 | return true 532 | end 533 | 534 | local function check_enum(spec, prop) 535 | for i=1, #spec.values do 536 | if prop == spec.values[i] then return true end 537 | end 538 | return false, "expected one of "..kind2str(spec).." (got '"..tostring(prop).."')" 539 | end 540 | 541 | local function check_literal(spec, prop) 542 | assert(type(spec.value) == "string") 543 | if type(prop) ~= spec.value then 544 | return false, "expected "..spec.value.." (got "..type(prop)..")" 545 | end 546 | return true 547 | end 548 | 549 | local function check_choice(spec, prop) 550 | for i = 1, #spec.values do 551 | if check(spec.values[i], prop) then 552 | return true 553 | end 554 | end 555 | return false, "expected one of "..kind2str(spec).." (got '"..tostring(prop).."')" 556 | end 557 | 558 | function check(spec, prop) 559 | if type(spec) == "string" then 560 | return check_node(spec, prop) 561 | elseif spec.type == "node" then 562 | return check_node(spec.kind, prop) 563 | elseif spec.type == "list" then 564 | return check_list(spec, prop) 565 | elseif spec.type == "enum" then 566 | return check_enum(spec, prop) 567 | elseif spec.type == "literal" then 568 | return check_literal(spec, prop) 569 | elseif spec.type == "choice" then 570 | return check_choice(spec, prop) 571 | else 572 | error("internal error: invalid spec type") 573 | end 574 | end 575 | 576 | local function validate(meta, node) 577 | if meta == nil then 578 | error("unknown node kind: "..node.kind) 579 | end 580 | for name, spec in pairs(meta.properties) do 581 | if node[name] == nil and type(spec.default) ~= 'nil' then 582 | node[name] = spec.default 583 | end 584 | local prop = node[name] 585 | if prop ~= nil or not spec.optional then 586 | local ok, er = check(spec, prop) 587 | if not ok then 588 | error(er.." for "..(node.kind or "?").."."..name) 589 | end 590 | end 591 | end 592 | return node 593 | end 594 | 595 | local function build(kind, props) 596 | local meta = syntax[kind] 597 | props.kind = kind 598 | return validate(meta, props) 599 | end 600 | 601 | return { 602 | syntax = syntax, 603 | build = build, 604 | } 605 | -------------------------------------------------------------------------------- /bcread.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- LuaJIT Language Toolkit. 3 | -- 4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved. 5 | -- 6 | -- Major portions taken verbatim or adapted from 7 | -- LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ 8 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved. 9 | -- 10 | -- See Copyright Notice in LICENSE 11 | -- 12 | 13 | local ffi = require("ffi") 14 | 15 | local band, bor, shl, shr, bnot = bit.band, bit.bor, bit.lshift, bit.rshift, bit.bnot 16 | local strsub, strbyte, strchar, format, gsub = string.sub, string.byte, string.char, string.format, string.gsub 17 | 18 | local BCDUMP = { 19 | HEAD1 = 0x1b, 20 | HEAD2 = 0x4c, 21 | HEAD3 = 0x4a, 22 | 23 | -- If you perform *any* kind of private modifications to the bytecode itself 24 | -- or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 25 | VERSION = 1, 26 | 27 | -- Compatibility flags. 28 | F_BE = 0x01, 29 | F_STRIP = 0x02, 30 | F_FFI = 0x04, 31 | } 32 | 33 | BCDUMP.F_KNOWN = BCDUMP.F_FFI*2-1 34 | 35 | local BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, BCDUMP_KGC_COMPLEX, BCDUMP_KGC_STR = 0, 1, 2, 3, 4, 5 36 | local BCDUMP_KTAB_NIL, BCDUMP_KTAB_FALSE, BCDUMP_KTAB_TRUE, BCDUMP_KTAB_INT, BCDUMP_KTAB_NUM, BCDUMP_KTAB_STR = 0, 1, 2, 3, 4, 5 37 | 38 | local BCM_REF = { 39 | 'none', 'dst', 'base', 'var', 'rbase', 'uv', -- Mode A must be <= 7 40 | 'lit', 'lits', 'pri', 'num', 'str', 'tab', 'func', 'jump', 'cdata' 41 | } 42 | 43 | local BCDEF_TAB = { 44 | {'ISLT', 'var', 'none', 'var', 'lt'}, 45 | {'ISGE', 'var', 'none', 'var', 'lt'}, 46 | {'ISLE', 'var', 'none', 'var', 'le'}, 47 | {'ISGT', 'var', 'none', 'var', 'le'}, 48 | 49 | {'ISEQV', 'var', 'none', 'var', 'eq'}, 50 | {'ISNEV', 'var', 'none', 'var', 'eq'}, 51 | {'ISEQS', 'var', 'none', 'str', 'eq'}, 52 | {'ISNES', 'var', 'none', 'str', 'eq'}, 53 | {'ISEQN', 'var', 'none', 'num', 'eq'}, 54 | {'ISNEN', 'var', 'none', 'num', 'eq'}, 55 | {'ISEQP', 'var', 'none', 'pri', 'eq'}, 56 | {'ISNEP', 'var', 'none', 'pri', 'eq'}, 57 | 58 | -- Unary test and copy ops. 59 | {'ISTC', 'dst', 'none', 'var', 'none'}, 60 | {'ISFC', 'dst', 'none', 'var', 'none'}, 61 | {'IST', 'none', 'none', 'var', 'none'}, 62 | {'ISF', 'none', 'none', 'var', 'none'}, 63 | 64 | -- Unary ops. 65 | {'MOV', 'dst', 'none', 'var', 'none'}, 66 | {'NOT', 'dst', 'none', 'var', 'none'}, 67 | {'UNM', 'dst', 'none', 'var', 'unm'}, 68 | {'LEN', 'dst', 'none', 'var', 'len'}, 69 | 70 | -- Binary ops. ORDER OPR. VV last, POW must be next. 71 | {'ADDVN', 'dst', 'var', 'num', 'add'}, 72 | {'SUBVN', 'dst', 'var', 'num', 'sub'}, 73 | {'MULVN', 'dst', 'var', 'num', 'mul'}, 74 | {'DIVVN', 'dst', 'var', 'num', 'div'}, 75 | {'MODVN', 'dst', 'var', 'num', 'mod'}, 76 | 77 | {'ADDNV', 'dst', 'var', 'num', 'add'}, 78 | {'SUBNV', 'dst', 'var', 'num', 'sub'}, 79 | {'MULNV', 'dst', 'var', 'num', 'mul'}, 80 | {'DIVNV', 'dst', 'var', 'num', 'div'}, 81 | {'MODNV', 'dst', 'var', 'num', 'mod'}, 82 | 83 | {'ADDVV', 'dst', 'var', 'var', 'add'}, 84 | {'SUBVV', 'dst', 'var', 'var', 'sub'}, 85 | {'MULVV', 'dst', 'var', 'var', 'mul'}, 86 | {'DIVVV', 'dst', 'var', 'var', 'div'}, 87 | {'MODVV', 'dst', 'var', 'var', 'mod'}, 88 | 89 | {'POW', 'dst', 'var', 'var', 'pow'}, 90 | {'CAT', 'dst', 'rbase', 'rbase', 'concat'}, 91 | 92 | -- Constant ops. 93 | {'KSTR', 'dst', 'none', 'str', 'none'}, 94 | {'KCDATA', 'dst', 'none', 'cdata', 'none'}, 95 | {'KSHORT', 'dst', 'none', 'lits', 'none'}, 96 | {'KNUM', 'dst', 'none', 'num', 'none'}, 97 | {'KPRI', 'dst', 'none', 'pri', 'none'}, 98 | {'KNIL', 'base', 'none', 'base', 'none'}, 99 | 100 | -- Upvalue and function ops. 101 | {'UGET', 'dst', 'none', 'uv', 'none'}, 102 | {'USETV', 'uv', 'none', 'var', 'none'}, 103 | {'USETS', 'uv', 'none', 'str', 'none'}, 104 | {'USETN', 'uv', 'none', 'num', 'none'}, 105 | {'USETP', 'uv', 'none', 'pri', 'none'}, 106 | {'UCLO', 'rbase', 'none', 'jump', 'none'}, 107 | {'FNEW', 'dst', 'none', 'func', 'gc'}, 108 | 109 | -- Table ops. 110 | {'TNEW', 'dst', 'none', 'lit', 'gc'}, 111 | {'TDUP', 'dst', 'none', 'tab', 'gc'}, 112 | {'GGET', 'dst', 'none', 'str', 'index'}, 113 | {'GSET', 'var', 'none', 'str', 'newindex'}, 114 | {'TGETV', 'dst', 'var', 'var', 'index'}, 115 | {'TGETS', 'dst', 'var', 'str', 'index'}, 116 | {'TGETB', 'dst', 'var', 'lit', 'index'}, 117 | {'TSETV', 'var', 'var', 'var', 'newindex'}, 118 | {'TSETS', 'var', 'var', 'str', 'newindex'}, 119 | {'TSETB', 'var', 'var', 'lit', 'newindex'}, 120 | {'TSETM', 'base', 'none', 'num', 'newindex'}, 121 | 122 | -- Calls and vararg handling. T = tail call. 123 | {'CALLM', 'base', 'lit', 'lit', 'call'}, 124 | {'CALL', 'base', 'lit', 'lit', 'call'}, 125 | {'CALLMT', 'base', 'none', 'lit', 'call'}, 126 | {'CALLT', 'base', 'none', 'lit', 'call'}, 127 | {'ITERC', 'base', 'lit', 'lit', 'call'}, 128 | {'ITERN', 'base', 'lit', 'lit', 'call'}, 129 | {'VARG', 'base', 'lit', 'lit', 'none'}, 130 | {'ISNEXT', 'base', 'none', 'jump', 'none'}, 131 | 132 | -- Returns. 133 | {'RETM', 'base', 'none', 'lit', 'none'}, 134 | {'RET', 'rbase', 'none', 'lit', 'none'}, 135 | {'RET0', 'rbase', 'none', 'lit', 'none'}, 136 | {'RET1', 'rbase', 'none', 'lit', 'none'}, 137 | 138 | -- Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. 139 | {'FORI', 'base', 'none', 'jump', 'none'}, 140 | {'JFORI', 'base', 'none', 'jump', 'none'}, 141 | 142 | {'FORL', 'base', 'none', 'jump', 'none'}, 143 | {'IFORL', 'base', 'none', 'jump', 'none'}, 144 | {'JFORL', 'base', 'none', 'lit', 'none'}, 145 | 146 | {'ITERL', 'base', 'none', 'jump', 'none'}, 147 | {'IITERL', 'base', 'none', 'jump', 'none'}, 148 | {'JITERL', 'base', 'none', 'lit', 'none'}, 149 | 150 | {'LOOP', 'rbase', 'none', 'jump', 'none'}, 151 | {'ILOOP', 'rbase', 'none', 'jump', 'none'}, 152 | {'JLOOP', 'rbase', 'none', 'lit', 'none'}, 153 | 154 | {'JMP', 'rbase', 'none', 'jump', 'none'}, 155 | 156 | -- Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func. 157 | {'FUNCF', 'rbase', 'none', 'none', 'none'}, 158 | {'IFUNCF', 'rbase', 'none', 'none', 'none'}, 159 | {'JFUNCF', 'rbase', 'none', 'lit', 'none'}, 160 | {'FUNCV', 'rbase', 'none', 'none', 'none'}, 161 | {'IFUNCV', 'rbase', 'none', 'none', 'none'}, 162 | {'JFUNCV', 'rbase', 'none', 'lit', 'none'}, 163 | {'FUNCC', 'rbase', 'none', 'none', 'none'}, 164 | {'FUNCCW', 'rbase', 'none', 'none', 'none'}, 165 | } 166 | 167 | local BC, BCMODE = {}, {} 168 | 169 | local function BCM(name) 170 | for i = 1, #BCM_REF do 171 | if BCM_REF[i] == name then return i - 1 end 172 | end 173 | end 174 | 175 | local function BCDEF_EVAL() 176 | for i = 1, #BCDEF_TAB do 177 | local li = BCDEF_TAB[i] 178 | local name, ma, mb, mc = li[1], BCM(li[2]), BCM(li[3]), BCM(li[4]) 179 | BC[i-1] = name 180 | BCMODE[i-1] = bor(ma, shl(mb, 3), shl(mc, 7)) 181 | end 182 | end 183 | 184 | BCDEF_EVAL() 185 | 186 | local PROTO_REF = { 187 | PROTO_CHILD = 0x01, -- Has child prototypes. 188 | PROTO_VARARG = 0x02, -- Vararg function. 189 | PROTO_FFI = 0x04, -- Uses BC_KCDATA for FFI datatypes. 190 | PROTO_NOJIT = 0x08, -- JIT disabled for this function. 191 | PROTO_ILOOP = 0x10, -- Patched bytecode with ILOOP etc. 192 | -- Only used during parsing. 193 | PROTO_HAS_RETURN = 0x20, -- Already emitted a return. 194 | PROTO_FIXUP_RETURN = 0x40, -- Need to fixup emitted returns. 195 | } 196 | 197 | local function proto_flags_string(flags) 198 | local t = {} 199 | for name, bit in pairs(PROTO_REF) do 200 | if band(flags, bit) ~= 0 then t[#t+1] = name end 201 | end 202 | return #t > 0 and table.concat(t, "|") or "None" 203 | end 204 | 205 | local function bytes_row(bytes, n) 206 | local t = {} 207 | local istart = (n - 1) * 8 208 | for i = istart + 1, istart + 8 do 209 | local b = bytes[i] 210 | if not b then break end 211 | t[#t+1] = format("%02x", b) 212 | end 213 | return #t, table.concat(t, " ") 214 | end 215 | 216 | local function text_fragment(text, n) 217 | local istart = (n - 1) * 46 218 | local s = strsub(text, istart + 1, istart + 46) 219 | return #s, s 220 | end 221 | 222 | local function log(out, ls, fmt, ...) 223 | local n = 1 224 | local bcount, tlen = 0, 0 225 | local text = format(fmt, ...) 226 | repeat 227 | local alen, a = bytes_row(ls.bytes, n) 228 | local blen, b = text_fragment(text, n) 229 | out:write(format("%-24s| %s\n", a, b)) 230 | bcount, tlen = bcount + alen, tlen + blen 231 | n = n + 1 232 | until bcount >= #ls.bytes and tlen >= #text 233 | ls.bytes = {} 234 | end 235 | 236 | local function chunkname_strip(s) 237 | s = gsub(s, "^@", "") 238 | s = gsub(s, ".+[/\\]", "") 239 | return s 240 | end 241 | 242 | local function proto_new(filename) 243 | return { 244 | kgc = {}, 245 | knum = {}, 246 | uv = {}, 247 | lineinfo = {}, 248 | uvinfo = {}, 249 | varinfo = {}, 250 | filename = filename, 251 | firstline = 0, 252 | numlines = 0, 253 | } 254 | end 255 | 256 | local function action(obj, method_name, ...) 257 | local m = obj[method_name] 258 | if m then m(obj, ...) end 259 | end 260 | 261 | local function save_position(ls) 262 | ls.bytes = {} 263 | return {p = ls.p, n = ls.n} 264 | end 265 | 266 | local function restore_position(ls, save) 267 | ls.bytes = {} 268 | ls.p, ls.n = save.p, save.n 269 | end 270 | 271 | local function byte(ls, p) 272 | p = p or ls.p 273 | return strbyte(ls.data, p, p) 274 | end 275 | 276 | local function bcread_need(ls, len) 277 | if ls.n < len then 278 | error("incomplete bytecode data") 279 | end 280 | end 281 | 282 | local function bcread_consume(ls, len) 283 | assert(ls.n >= len, "incomplete bytecode data") 284 | for p = ls.p, ls.p + len - 1 do 285 | ls.bytes[#ls.bytes + 1] = byte(ls, p) 286 | end 287 | ls.n = ls.n - len 288 | end 289 | 290 | local function bcread_dec(ls) 291 | assert(ls.n > 0, "incomplete bytecode data") 292 | local b = byte(ls) 293 | ls.bytes[#ls.bytes + 1] = b 294 | ls.n = ls.n - 1 295 | return b 296 | end 297 | 298 | local function bcread_byte(ls) 299 | local b = bcread_dec(ls) 300 | ls.p = ls.p + 1 301 | return b 302 | end 303 | 304 | local function bcread_uint16(ls) 305 | local a, b = strbyte(ls.data, ls.p, ls.p + 1) 306 | bcread_consume(ls, 2) 307 | ls.p = ls.p + 2 308 | return bor(shl(b, 8), a) 309 | end 310 | 311 | local function bcread_uint32(ls) 312 | local a, b, c, d = strbyte(ls.data, ls.p, ls.p + 3) 313 | bcread_consume(ls, 4) 314 | ls.p = ls.p + 4 315 | return bor(shl(d, 24), shl(c, 16), shl(b, 8), a) 316 | end 317 | 318 | local function bcread_string(ls) 319 | local p = ls.p 320 | while byte(ls, p) ~= 0 and ls.n > 0 do 321 | p = p + 1 322 | end 323 | assert(byte(ls, p) == 0 and p > ls.p, "corrupted bytecode") 324 | local s = strsub(ls.data, ls.p, p - 1) 325 | local len = p - ls.p + 1 326 | bcread_consume(ls, len) 327 | ls.p = p + 1 328 | return s 329 | end 330 | 331 | local function bcread_uleb128(ls) 332 | local v = bcread_byte(ls) 333 | if v >= 0x80 then 334 | local sh = 0 335 | v = band(v, 0x7f) 336 | repeat 337 | local b = bcread_byte(ls) 338 | v = bor(v, shl(band(b, 0x7f), sh + 7)) 339 | sh = sh + 7 340 | until b < 0x80 341 | end 342 | return v 343 | end 344 | 345 | -- Read top 32 bits of 33 bit ULEB128 value from buffer. 346 | local function bcread_uleb128_33(ls) 347 | local v = shr(bcread_byte(ls), 1) 348 | if v >= 0x40 then 349 | local sh = -1 350 | v = band(v, 0x3f) 351 | repeat 352 | local b = bcread_byte(ls) 353 | v = bor(v, shl(band(b, 0x7f), sh + 7)) 354 | sh = sh + 7 355 | until b < 0x80 356 | end 357 | return v 358 | end 359 | 360 | local function bcread_mem(ls, len) 361 | local s = strsub(ls.data, ls.p, ls.p + len - 1) 362 | bcread_consume(ls, len) 363 | ls.p = ls.p + len 364 | return s 365 | end 366 | 367 | local bcread_block = bcread_mem 368 | 369 | 370 | local function ctlsub(c) 371 | if c == "\n" then return "\\n" 372 | elseif c == "\r" then return "\\r" 373 | elseif c == "\t" then return "\\t" 374 | else return format("\\%03d", byte(c)) 375 | end 376 | end 377 | 378 | local function bcread_ins(ls) 379 | local ins = bcread_uint32(ls) 380 | local op = band(ins, 0xff) 381 | return ins, BCMODE[op] 382 | end 383 | 384 | -- Return one bytecode line. 385 | local function bcline(proto, pc, ins, m, prefix) 386 | local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128) 387 | local a = band(shr(ins, 8), 0xff) 388 | local op = BC[band(ins, 0xff)] 389 | local s = format("%04d %s %-6s %3s ", pc, prefix or " ", op, ma == 0 and "" or a) 390 | local d = shr(ins, 16) 391 | if mc == 13*128 then -- BCMjump 392 | return format("%s=> %04d", s, pc+d-0x7fff) 393 | end 394 | if mb ~= 0 then 395 | d = band(d, 0xff) 396 | elseif mc == 0 then 397 | return s 398 | end 399 | local kc 400 | if mc == 10*128 then -- BCMstr 401 | local kgc = proto.kgc 402 | kc = kgc[#kgc - d] 403 | kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub)) 404 | elseif mc == 9*128 then -- BCMnum 405 | kc = proto.knum[d+1] 406 | if op == "TSETM " then kc = kc - 2^52 end 407 | elseif mc == 12*128 then -- BCMfunc 408 | local f = proto.kgc[#proto.kgc - d] 409 | kc = format("%s:%d", f.filename, f.firstline) 410 | elseif mc == 5*128 then -- BCMuv 411 | kc = proto.uvinfo[d+1] 412 | end 413 | if ma == 5 then -- BCMuv 414 | local ka = proto.uvinfo[a+1] 415 | if kc then kc = ka.." ; "..kc else kc = ka end 416 | end 417 | if mb ~= 0 then 418 | local b = shr(ins, 24) 419 | if kc then return format("%s%3d %3d ; %s", s, b, d, kc) end 420 | return format("%s%3d %3d", s, b, d) 421 | end 422 | if kc then return format("%s%3d ; %s", s, d, kc) end 423 | if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits 424 | return format("%s%3d", s, d) 425 | end 426 | 427 | local function flags_string(flags) 428 | local t = {} 429 | if band(flags, BCDUMP.F_FFI) ~= 0 then t[#t+1] = "BCDUMP_F_FFI" end 430 | if band(flags, BCDUMP.F_STRIP) ~= 0 then t[#t+1] = "BCDUMP_F_STRIP" end 431 | return #t > 0 and table.concat(t, "|") or "None" 432 | end 433 | 434 | local function bcread_bytecode(ls, target, sizebc) 435 | action(target, "enter_bytecode", ls) 436 | for pc = 1, sizebc - 1 do 437 | local ins, m = bcread_ins(ls) 438 | action(target, "ins", ls, pc, ins, m) 439 | end 440 | end 441 | 442 | local function uv_decode(uv) 443 | if band(uv, 0x8000) ~= 0 then 444 | local imm = (band(uv, 0x40) ~= 0) 445 | return band(uv, 0x3fff), true, imm 446 | else 447 | return uv, false, false 448 | end 449 | end 450 | 451 | local function bcread_uv(ls, target, sizeuv) 452 | action(target, "enter_uv", ls) 453 | for i = 1, sizeuv do 454 | local uv = bcread_uint16(ls) 455 | action(target, "uv", ls, i, uv) 456 | end 457 | end 458 | 459 | local double_new = ffi.typeof('double[1]') 460 | local uint32_new = ffi.typeof('uint32_t[1]') 461 | local int64_new = ffi.typeof('int64_t[1]') 462 | local uint64_new = ffi.typeof('uint64_t[1]') 463 | local complex = ffi.typeof('complex') 464 | 465 | local function dword_new_u32(cdata_new, lo, hi) 466 | local value = cdata_new() 467 | local char = ffi.cast('uint8_t*', value) 468 | local u32_lo, u32_hi = uint32_new(lo), uint32_new(hi) 469 | ffi.copy(char, u32_lo, 4) 470 | ffi.copy(char + 4, u32_hi, 4) 471 | return value[0] 472 | end 473 | 474 | local function bcread_ktabk(ls, target) 475 | local tp = bcread_uleb128(ls) 476 | if tp >= BCDUMP_KTAB_STR then 477 | local len = tp - BCDUMP_KTAB_STR 478 | local str = bcread_mem(ls, len) 479 | action(target, "ktabk", ls, "string", str) 480 | elseif tp == BCDUMP_KTAB_INT then 481 | local n = bcread_uleb128(ls) 482 | action(target, "ktabk", ls, "int", n) 483 | elseif tp == BCDUMP_KTAB_NUM then 484 | local lo = bcread_uleb128(ls) 485 | local hi = bcread_uleb128(ls) 486 | local value = dword_new_u32(double_new, lo, hi) 487 | action(target, "ktabk", ls, "num", value) 488 | else 489 | assert(tp <= BCDUMP_KTAB_TRUE) 490 | action(target, "ktabk", ls, "pri", tp) 491 | end 492 | end 493 | 494 | local function bcread_ktab(ls, target) 495 | local narray = bcread_uleb128(ls) 496 | local nhash = bcread_uleb128(ls) 497 | action(target, "ktab_dim", ls, narray, nhash) 498 | for i = 1, narray do 499 | bcread_ktabk(ls, target) 500 | end 501 | for i = 1, nhash do 502 | bcread_ktabk(ls, target) 503 | bcread_ktabk(ls, target) 504 | end 505 | return -1 506 | end 507 | 508 | local function bcread_kgc(ls, target, sizekgc) 509 | action(target, "enter_kgc", ls) 510 | for i = 1, sizekgc do 511 | local tp = bcread_uleb128(ls) 512 | if tp >= BCDUMP_KGC_STR then 513 | local len = tp - BCDUMP_KGC_STR 514 | local str = bcread_mem(ls, len) 515 | action(target, "kgc", ls, i, str) 516 | elseif tp == BCDUMP_KGC_TAB then 517 | local value = bcread_ktab(ls, target) 518 | action(target, "kgc", ls, i, value) 519 | elseif tp ~= BCDUMP_KGC_CHILD then 520 | local lo0, hi0 = bcread_uleb128(ls), bcread_uleb128(ls) 521 | if tp == BCDUMP_KGC_COMPLEX then 522 | local lo1, hi1 = bcread_uleb128(ls), bcread_uleb128(ls) 523 | local re = dword_new_u32(double_new, lo0, hi0) 524 | local im = dword_new_u32(double_new, lo1, hi1) 525 | action(target, "kgc", ls, i, complex(re, im)) 526 | else 527 | local cdata_new = tp == BCDUMP_KGC_I64 and int64_new or uint64_new 528 | local value = dword_new_u32(cdata_new, lo0, hi0) 529 | action(target, "kgc", ls, i, value) 530 | end 531 | else 532 | action(target, "kgc", ls, i, 0) 533 | end 534 | end 535 | end 536 | 537 | local function bcread_knum(ls, target, sizekn) 538 | action(target, "enter_knum", ls) 539 | for i = 1, sizekn do 540 | local isnumbit = band(byte(ls), 1) 541 | local lo = bcread_uleb128_33(ls) 542 | if isnumbit ~= 0 then 543 | local hi = bcread_uleb128(ls) 544 | local value = dword_new_u32(double_new, lo, hi) 545 | action(target, "knum", ls, i, "num", value) 546 | else 547 | action(target, "knum", ls, i, "int", lo) 548 | end 549 | end 550 | end 551 | 552 | local function bcread_lineinfo(ls, target, firstline, numlines, sizebc, sizedbg) 553 | if numlines < 256 then 554 | for pc = 1, sizebc - 1 do 555 | local line = bcread_byte(ls) 556 | action(target, "lineinfo", ls, pc, firstline + line) 557 | end 558 | elseif numlines < 65536 then 559 | for pc = 1, sizebc - 1 do 560 | local line = bcread_uint16(ls) 561 | action(target, "lineinfo", ls, pc, firstline + line) 562 | end 563 | else 564 | for pc = 1, sizebc - 1 do 565 | local line = bcread_uint32(ls) 566 | action(target, "lineinfo", ls, pc, firstline + line) 567 | end 568 | end 569 | end 570 | 571 | local function bcread_uvinfo(ls, target, sizeuv) 572 | for i = 1, sizeuv do 573 | local name = bcread_string(ls) 574 | action(target, "uvinfo", ls, i, name) 575 | end 576 | end 577 | 578 | local VARNAME = { 579 | "(for index)", "(for limit)", "(for step)", "(for generator)", 580 | "(for state)", "(for control)" 581 | } 582 | 583 | local function bcread_varinfo(ls, target) 584 | local lastpc = 0 585 | while true do 586 | local vn = byte(ls) 587 | local name 588 | if vn < #VARNAME + 1 then 589 | bcread_byte(ls) 590 | if vn == 0 then break end 591 | name = VARNAME[vn] 592 | else 593 | name = bcread_string(ls) 594 | end 595 | local startpc = lastpc + bcread_uleb128(ls) 596 | local endpc = startpc + bcread_uleb128(ls) 597 | action(target, "varinfo", ls, name, startpc, endpc) 598 | lastpc = startpc 599 | end 600 | end 601 | 602 | local function bcread_dbg(ls, target, firstline, numlines, sizebc, sizeuv, sizedbg) 603 | action(target, "enter_debug", ls) 604 | bcread_lineinfo(ls, target, firstline, numlines, sizebc, sizedbg) 605 | bcread_uvinfo(ls, target, sizeuv) 606 | bcread_varinfo(ls, target) 607 | end 608 | 609 | -- This function return an object used as target by bcread_* routines in the 610 | -- first pass of bytecode read. The role of this object is to acquire 611 | -- informations about kgc, knum, uv, jump targets etc. 612 | -- The informations are stored in the "proto" object and used by the "printer" 613 | -- object in the second pass. 614 | local function proto_info_target(target) 615 | local proto = target.proto 616 | local function knum(_, ls, i, tag, value) 617 | proto.knum[i] = value 618 | end 619 | local function kgc(_, ls, i, value) 620 | if value == 0 then 621 | value = table.remove(target.childs) 622 | end 623 | proto.kgc[i] = value 624 | end 625 | local function uv(_, ls, i, value) 626 | proto.uv[i] = value 627 | end 628 | local function lineinfo(_, ls, pc, line) 629 | proto.lineinfo[pc] = line 630 | end 631 | local function uvinfo(_, ls, i, name) 632 | proto.uvinfo[i] = name 633 | end 634 | local function varinfo(_, ls, name, startpc, endpc) 635 | proto.varinfo[#proto.varinfo + 1] = {name, startpc, endpc} 636 | end 637 | local function enter_bytecode() 638 | proto.target = {} 639 | end 640 | local function ins(_, ls, pc, ins, m) 641 | if band(m, 15*128) == 13*128 then proto.target[pc+shr(ins, 16)-0x7fff] = true end 642 | end 643 | return { 644 | knum = knum, kgc = kgc, uv = uv, 645 | lineinfo = lineinfo, uvinfo = uvinfo, varinfo = varinfo, 646 | enter_bytecode = enter_bytecode, ins = ins, 647 | } 648 | end 649 | 650 | local function bcread_proto(ls, target) 651 | if ls.n > 0 and byte(ls) == 0 then 652 | bcread_byte(ls) 653 | action(target, "eof", ls) 654 | return nil 655 | end 656 | action(target, "enter_proto", ls) 657 | local proto = proto_new(chunkname_strip(target.chunkname)) 658 | target.proto = proto 659 | local len = bcread_uleb128(ls) 660 | local startn = ls.n 661 | action(target, "proto_len", ls, len) 662 | if len == 0 then return nil end 663 | bcread_need(ls, len) 664 | 665 | -- Read prototype header. 666 | local flags = bcread_byte(ls) 667 | action(target, "proto_flags", ls, flags) 668 | local numparams = bcread_byte(ls) 669 | action(target, "proto_numparams", ls, numparams) 670 | local framesize = bcread_byte(ls) 671 | action(target, "proto_framesize", ls, framesize) 672 | local sizeuv = bcread_byte(ls) 673 | local sizekgc = bcread_uleb128(ls) 674 | local sizekn = bcread_uleb128(ls) 675 | local sizebc = bcread_uleb128(ls) + 1 676 | action(target, "proto_sizes", ls, sizeuv, sizekgc, sizekn, sizebc) 677 | 678 | local sizedbg, firstline, numlines = 0, 0, 0 679 | if band(ls.flags, BCDUMP.F_STRIP) == 0 then 680 | sizedbg = bcread_uleb128(ls) 681 | action(target, "proto_debug_size", ls, sizedbg) 682 | if sizedbg > 0 then 683 | firstline = bcread_uleb128(ls) 684 | numlines = bcread_uleb128(ls) 685 | proto.firstline, proto.numlines = firstline, numlines 686 | action(target, "proto_lines", ls, firstline, numlines) 687 | end 688 | end 689 | 690 | local info = proto_info_target(target) 691 | if info then 692 | local save = save_position(ls) 693 | bcread_bytecode(ls, info, sizebc) 694 | bcread_uv(ls, info, sizeuv) 695 | bcread_kgc(ls, info, sizekgc) 696 | bcread_knum(ls, info, sizekn) 697 | if sizedbg > 0 then 698 | bcread_dbg(ls, info, firstline, numlines, sizebc, sizeuv, sizedbg) 699 | end 700 | restore_position(ls, save) 701 | end 702 | 703 | bcread_bytecode(ls, target, sizebc) 704 | bcread_uv(ls, target, sizeuv) 705 | bcread_kgc(ls, target, sizekgc) 706 | bcread_knum(ls, target, sizekn) 707 | if sizedbg > 0 then 708 | bcread_dbg(ls, target, firstline, numlines, sizebc, sizeuv, sizedbg) 709 | end 710 | 711 | assert(len == startn - ls.n, "prototype bytecode size mismatch") 712 | return target.proto 713 | end 714 | 715 | local function bcread_header(ls, target) 716 | if bcread_byte(ls) ~= BCDUMP.HEAD2 or bcread_byte(ls) ~= BCDUMP.HEAD3 or bcread_byte(ls) ~= BCDUMP.VERSION then 717 | error("invalid header") 718 | end 719 | action(target, "header", ls) 720 | local flags = bcread_uleb128(ls) 721 | ls.flags = flags 722 | action(target, "flags", ls, flags) 723 | if band(flags, bnot(BCDUMP.F_KNOWN)) ~= 0 then 724 | error("unknown flags") 725 | end 726 | if band(flags, BCDUMP.F_STRIP) == 0 then 727 | local len = bcread_uleb128(ls) 728 | bcread_need(ls, len) 729 | target.chunkname = bcread_mem(ls, len) 730 | action(target, "set_chunkname", ls, target.chunkname) 731 | end 732 | end 733 | 734 | -- The "printer" object is used to pretty-print on the screen the bytecode's 735 | -- hex dump side by side with the decoded meaning of each chunk of bytes. 736 | -- The routines bcread_* reads the bytecode and calls an appropriate "printer" 737 | -- method with the decoded informations. In turns the "printer" method write on 738 | -- the screen the bytes and the informations. 739 | -- The "printer" object assume that a "proto" field is available with some 740 | -- prototype's informations. The required informations includes kgc, knum, uv, 741 | -- debug name and line numbers. 742 | 743 | local Printer = { } 744 | 745 | function Printer:set_chunkname(ls, chunkname) 746 | log(self.out, ls, format("Chunkname: %s", chunkname)) 747 | end 748 | 749 | function Printer:enter_proto(ls) 750 | log(self.out, ls, ".. prototype ..") 751 | end 752 | 753 | function Printer:header(ls) log(self.out, ls, "Header LuaJIT 2.0 BC") end 754 | function Printer:flags(ls, flags) log(self.out, ls, format("Flags: %s", flags_string(flags))) end 755 | function Printer:enter_kgc(ls) log(self.out, ls, ".. kgc ..") end 756 | function Printer:enter_knum(ls) log(self.out, ls, ".. knum ..") end 757 | function Printer:enter_bytecode(ls) log(self.out, ls, ".. bytecode ..") end 758 | function Printer:enter_uv(ls) log(self.out, ls, ".. uv ..") end 759 | function Printer:enter_debug(ls) log(self.out, ls, ".. debug ..") end 760 | function Printer:eof(ls) log(self.out, ls, "eof") end 761 | function Printer:proto_flags(ls, flags) log(self.out, ls, "prototype flags %s", proto_flags_string(flags)) end 762 | function Printer:proto_len(ls, len) log(self.out, ls, "prototype length %d", len) end 763 | function Printer:proto_numparams(ls, numparams) log(self.out, ls, "parameters number %d", numparams) end 764 | function Printer:proto_framesize(ls, framesize) log(self.out, ls, "framesize %d", framesize) end 765 | function Printer:proto_sizes(ls, sizeuv, sizekgc, sizekn, sizebc) log(self.out, ls, "size uv: %d kgc: %d kn: %d bc: %d", sizeuv, sizekgc, sizekn, sizebc) end 766 | function Printer:proto_debug_size(ls, sizedbg) log(self.out, ls, "debug size %d", sizedbg) end 767 | 768 | function Printer:proto_lines(ls, firstline, numlines) 769 | log(self.out, ls, "firstline: %d numline: %d", firstline, numlines) 770 | end 771 | 772 | function Printer:ins(ls, pc, ins, m) 773 | local s = bcline(self.proto, pc, ins, m, self.proto.target[pc] and "=>") 774 | log(self.out, ls, "%s", s) 775 | end 776 | 777 | function Printer:knum(ls, i, tag, num) 778 | log(self.out, ls, "knum %s: %g", tag, num) 779 | end 780 | 781 | function Printer:kgc(ls, i, value) 782 | local str 783 | if type(value) == "string" then 784 | str = format("%q", value) 785 | elseif value == 0 then 786 | local pt = self.proto.kgc[i] 787 | str = format("", pt.filename, pt.firstline) 788 | else 789 | str = tostring(value) 790 | end 791 | log(self.out, ls, "kgc: %s", str) 792 | end 793 | 794 | function Printer:ktab_dim(ls, narray, nhash) 795 | log(self.out, ls, "ktab narray: %d nhash: %d", narray, nhash) 796 | end 797 | 798 | function Printer:ktabk(ls, tag, value) 799 | local ps = {"nil", "false", "true"} 800 | local s = tag == "string" and format("%q", value) or (tag == "pri" and ps[value] or tostring(value)) 801 | log(self.out, ls, "ktabk %s: %s", tag, s) 802 | end 803 | 804 | function Printer:uv(ls, i, value) 805 | local uv, islocal, imm = uv_decode(value) 806 | if islocal then 807 | log(self.out, ls, "upvalue %slocal %d", imm and "(const) " or "", uv) 808 | else 809 | log(self.out, ls, "upvalue upper %d", uv) 810 | end 811 | end 812 | 813 | function Printer:lineinfo(ls, pc, line) 814 | log(self.out, ls, "pc%03d: line %d", pc, line) 815 | end 816 | 817 | function Printer:uvinfo(ls, i, name) 818 | log(self.out, ls, "uv%d: name: %s", i - 1, name) 819 | end 820 | 821 | function Printer:varinfo(ls, name, startpc, endpc) 822 | log(self.out, ls, "var: %s pc: %d - %d", name, startpc, endpc) 823 | end 824 | 825 | -- The BCList object is used to print the bytecode instructions as 826 | -- "luajit -bl" does. 827 | local BCList = { } 828 | 829 | function BCList:enter_bytecode() 830 | local pt = self.proto 831 | self.out:write(format("-- BYTECODE -- %s:%d-%d\n", pt.filename, pt.firstline, pt.firstline + pt.numlines)) 832 | end 833 | 834 | function BCList:ins(ls, pc, ins, m) 835 | local s = bcline(self.proto, pc, ins, m, self.proto.target[pc] and "=>") 836 | self.out:write(s) 837 | self.out:write("\n") 838 | end 839 | 840 | function BCList:enter_uv() self.out:write("\n") end 841 | 842 | local function printer_new(output, class, chunkname) 843 | local p = { out = output, childs = {}, chunkname = chunkname } 844 | return setmetatable(p, { __index = class }) 845 | end 846 | 847 | local function bcread(s, output, chunkname, hexdump) 848 | local ls = {data = s, n = #s, p = 1, bytes = {}} 849 | local printer = printer_new(output, hexdump and Printer or BCList, chunkname) 850 | if bcread_byte(ls) ~= BCDUMP.HEAD1 then 851 | return "invalid header beginning char" 852 | end 853 | bcread_header(ls, printer) 854 | repeat 855 | local pt = bcread_proto(ls, printer) 856 | printer.childs[#printer.childs + 1] = pt 857 | until not pt 858 | if ls.n > 0 then 859 | error("spurious bytecode") 860 | end 861 | end 862 | 863 | return { dump = bcread } 864 | -------------------------------------------------------------------------------- /bcsave.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- LuaJIT Language Toolkit. 3 | -- 4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved. 5 | -- 6 | -- See Copyright Notice in LICENSE 7 | -- 8 | ---------------------------------------------------------------------------- 9 | -- 10 | -- Major portions taken verbatim or adapted from 11 | -- LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/ 12 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved. 13 | -- 14 | -- LuaJIT module to save/list bytecode. 15 | -- 16 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved. 17 | -- Released under the MIT license. See Copyright Notice in luajit.h 18 | ---------------------------------------------------------------------------- 19 | -- 20 | -- This module saves or lists the bytecode for an input file. 21 | -- It's run by the -b command line option. 22 | -- 23 | ------------------------------------------------------------------------------ 24 | 25 | local jit = require("jit") 26 | local bit = require("bit") 27 | 28 | -- Symbol name prefix for LuaJIT bytecode. 29 | local LJBC_PREFIX = "luaJIT_BC_" 30 | 31 | ------------------------------------------------------------------------------ 32 | 33 | local function usage() 34 | io.stderr:write[[ 35 | Save LuaJIT bytecode using the language toolkit. 36 | Usage: luajit-x -b[options] input output 37 | -l Only list bytecode. 38 | -x Only list bytecode with extended informations. 39 | -s Strip debug info (default). 40 | -g Keep debug info. 41 | -n name Set module name (default: auto-detect from input name). 42 | -t type Set output file type (default: auto-detect from output name). 43 | -a arch Override architecture for object files (default: native). 44 | -o os Override OS for object files (default: native). 45 | -e chunk Use chunk string as input. 46 | -- Stop handling options. 47 | - Use stdin as input and/or stdout as output. 48 | 49 | File types: c h obj o raw (default) 50 | ]] 51 | os.exit(1) 52 | end 53 | 54 | local function check(ok, ...) 55 | if ok then return ok, ... end 56 | io.stderr:write("luajit lang toolkit: ", ...) 57 | io.stderr:write("\n") 58 | os.exit(1) 59 | end 60 | 61 | local function savefile(name, mode) 62 | if name == "-" then return io.stdout end 63 | return check(io.open(name, mode)) 64 | end 65 | 66 | ------------------------------------------------------------------------------ 67 | 68 | local map_type = { 69 | raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", 70 | } 71 | 72 | local map_arch = { 73 | x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 74 | mips = true, mipsel = true, 75 | } 76 | 77 | local map_os = { 78 | linux = true, windows = true, osx = true, freebsd = true, netbsd = true, 79 | openbsd = true, solaris = true, 80 | } 81 | 82 | local function checkarg(str, map, err) 83 | str = string.lower(str) 84 | local s = check(map[str], "unknown ", err) 85 | return s == true and str or s 86 | end 87 | 88 | local function detecttype(str) 89 | local ext = string.match(string.lower(str), "%.(%a+)$") 90 | return map_type[ext] or "raw" 91 | end 92 | 93 | local function checkmodname(str) 94 | check(string.match(str, "^[%w_.%-]+$"), "bad module name") 95 | return string.gsub(str, "[%.%-]", "_") 96 | end 97 | 98 | local function detectmodname(str) 99 | if type(str) == "string" then 100 | local tail = string.match(str, "[^/\\]+$") 101 | if tail then str = tail end 102 | local head = string.match(str, "^(.*)%.[^.]*$") 103 | if head then str = head end 104 | str = string.match(str, "^[%w_.%-]+") 105 | else 106 | str = nil 107 | end 108 | check(str, "cannot derive module name, use -n name") 109 | return string.gsub(str, "[%.%-]", "_") 110 | end 111 | 112 | ------------------------------------------------------------------------------ 113 | 114 | local function bcsave_tail(fp, output, s) 115 | local ok, err = fp:write(s) 116 | if ok and output ~= "-" then ok, err = fp:close() end 117 | check(ok, "cannot write ", output, ": ", err) 118 | end 119 | 120 | local function bcsave_raw(output, s) 121 | local fp = savefile(output, "wb") 122 | bcsave_tail(fp, output, s) 123 | end 124 | 125 | local function bcsave_c(ctx, output, s) 126 | local fp = savefile(output, "w") 127 | if ctx.type == "c" then 128 | fp:write(string.format([[ 129 | #ifdef _cplusplus 130 | extern "C" 131 | #endif 132 | #ifdef _WIN32 133 | __declspec(dllexport) 134 | #endif 135 | const char %s%s[] = { 136 | ]], LJBC_PREFIX, ctx.modname)) 137 | else 138 | fp:write(string.format([[ 139 | #define %s%s_SIZE %d 140 | static const char %s%s[] = { 141 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 142 | end 143 | local t, n, m = {}, 0, 0 144 | for i=1,#s do 145 | local b = tostring(string.byte(s, i)) 146 | m = m + #b + 1 147 | if m > 78 then 148 | fp:write(table.concat(t, ",", 1, n), ",\n") 149 | n, m = 0, #b + 1 150 | end 151 | n = n + 1 152 | t[n] = b 153 | end 154 | bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 155 | end 156 | 157 | local function bcsave_elfobj(ctx, output, s, ffi) 158 | ffi.cdef[[ 159 | typedef struct { 160 | uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7]; 161 | uint16_t type, machine; 162 | uint32_t version; 163 | uint32_t entry, phofs, shofs; 164 | uint32_t flags; 165 | uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx; 166 | } ELF32header; 167 | typedef struct { 168 | uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7]; 169 | uint16_t type, machine; 170 | uint32_t version; 171 | uint64_t entry, phofs, shofs; 172 | uint32_t flags; 173 | uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx; 174 | } ELF64header; 175 | typedef struct { 176 | uint32_t name, type, flags, addr, ofs, size, link, info, align, entsize; 177 | } ELF32sectheader; 178 | typedef struct { 179 | uint32_t name, type; 180 | uint64_t flags, addr, ofs, size; 181 | uint32_t link, info; 182 | uint64_t align, entsize; 183 | } ELF64sectheader; 184 | typedef struct { 185 | uint32_t name, value, size; 186 | uint8_t info, other; 187 | uint16_t sectidx; 188 | } ELF32symbol; 189 | typedef struct { 190 | uint32_t name; 191 | uint8_t info, other; 192 | uint16_t sectidx; 193 | uint64_t value, size; 194 | } ELF64symbol; 195 | typedef struct { 196 | ELF32header hdr; 197 | ELF32sectheader sect[6]; 198 | ELF32symbol sym[2]; 199 | uint8_t space[4096]; 200 | } ELF32obj; 201 | typedef struct { 202 | ELF64header hdr; 203 | ELF64sectheader sect[6]; 204 | ELF64symbol sym[2]; 205 | uint8_t space[4096]; 206 | } ELF64obj; 207 | ]] 208 | local symname = LJBC_PREFIX..ctx.modname 209 | local is64, isbe = false, false 210 | if ctx.arch == "x64" then 211 | is64 = true 212 | elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then 213 | isbe = true 214 | end 215 | 216 | -- Handle different host/target endianess. 217 | local function f32(x) return x end 218 | local f16, fofs = f32, f32 219 | if ffi.abi("be") ~= isbe then 220 | f32 = bit.bswap 221 | function f16(x) return bit.rshift(bit.bswap(x), 16) end 222 | if is64 then 223 | local two32 = ffi.cast("int64_t", 2^32) 224 | function fofs(x) return bit.bswap(x)*two32 end 225 | else 226 | fofs = f32 227 | end 228 | end 229 | 230 | -- Create ELF object and fill in header. 231 | local o = ffi.new(is64 and "ELF64obj" or "ELF32obj") 232 | local hdr = o.hdr 233 | if ctx.os == "bsd" or ctx.os == "other" then -- Determine native hdr.eosabi. 234 | local bf = assert(io.open("/bin/ls", "rb")) 235 | local bs = bf:read(9) 236 | bf:close() 237 | ffi.copy(o, bs, 9) 238 | check(hdr.emagic[0] == 127, "no support for writing native object files") 239 | else 240 | hdr.emagic = "\127ELF" 241 | hdr.eosabi = ({ freebsd=9, netbsd=2, openbsd=12, solaris=6 })[ctx.os] or 0 242 | end 243 | hdr.eclass = is64 and 2 or 1 244 | hdr.eendian = isbe and 2 or 1 245 | hdr.eversion = 1 246 | hdr.type = f16(1) 247 | hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 248 | if ctx.arch == "mips" or ctx.arch == "mipsel" then 249 | hdr.flags = 0x50001006 250 | end 251 | hdr.version = f32(1) 252 | hdr.shofs = fofs(ffi.offsetof(o, "sect")) 253 | hdr.ehsize = f16(ffi.sizeof(hdr)) 254 | hdr.shentsize = f16(ffi.sizeof(o.sect[0])) 255 | hdr.shnum = f16(6) 256 | hdr.shstridx = f16(2) 257 | 258 | -- Fill in sections and symbols. 259 | local sofs, ofs = ffi.offsetof(o, "space"), 1 260 | for i,name in ipairs{ 261 | ".symtab", ".shstrtab", ".strtab", ".rodata", ".note.GNU-stack", 262 | } do 263 | local sect = o.sect[i] 264 | sect.align = fofs(1) 265 | sect.name = f32(ofs) 266 | ffi.copy(o.space+ofs, name) 267 | ofs = ofs + #name+1 268 | end 269 | o.sect[1].type = f32(2) -- .symtab 270 | o.sect[1].link = f32(3) 271 | o.sect[1].info = f32(1) 272 | o.sect[1].align = fofs(8) 273 | o.sect[1].ofs = fofs(ffi.offsetof(o, "sym")) 274 | o.sect[1].entsize = fofs(ffi.sizeof(o.sym[0])) 275 | o.sect[1].size = fofs(ffi.sizeof(o.sym)) 276 | o.sym[1].name = f32(1) 277 | o.sym[1].sectidx = f16(4) 278 | o.sym[1].size = fofs(#s) 279 | o.sym[1].info = 17 280 | o.sect[2].type = f32(3) -- .shstrtab 281 | o.sect[2].ofs = fofs(sofs) 282 | o.sect[2].size = fofs(ofs) 283 | o.sect[3].type = f32(3) -- .strtab 284 | o.sect[3].ofs = fofs(sofs + ofs) 285 | o.sect[3].size = fofs(#symname+1) 286 | ffi.copy(o.space+ofs+1, symname) 287 | ofs = ofs + #symname + 2 288 | o.sect[4].type = f32(1) -- .rodata 289 | o.sect[4].flags = fofs(2) 290 | o.sect[4].ofs = fofs(sofs + ofs) 291 | o.sect[4].size = fofs(#s) 292 | o.sect[5].type = f32(1) -- .note.GNU-stack 293 | o.sect[5].ofs = fofs(sofs + ofs + #s) 294 | 295 | -- Write ELF object file. 296 | local fp = savefile(output, "wb") 297 | fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs)) 298 | bcsave_tail(fp, output, s) 299 | end 300 | 301 | local function bcsave_peobj(ctx, output, s, ffi) 302 | ffi.cdef[[ 303 | typedef struct { 304 | uint16_t arch, nsects; 305 | uint32_t time, symtabofs, nsyms; 306 | uint16_t opthdrsz, flags; 307 | } PEheader; 308 | typedef struct { 309 | char name[8]; 310 | uint32_t vsize, vaddr, size, ofs, relocofs, lineofs; 311 | uint16_t nreloc, nline; 312 | uint32_t flags; 313 | } PEsection; 314 | typedef struct __attribute((packed)) { 315 | union { 316 | char name[8]; 317 | uint32_t nameref[2]; 318 | }; 319 | uint32_t value; 320 | int16_t sect; 321 | uint16_t type; 322 | uint8_t scl, naux; 323 | } PEsym; 324 | typedef struct __attribute((packed)) { 325 | uint32_t size; 326 | uint16_t nreloc, nline; 327 | uint32_t cksum; 328 | uint16_t assoc; 329 | uint8_t comdatsel, unused[3]; 330 | } PEsymaux; 331 | typedef struct { 332 | PEheader hdr; 333 | PEsection sect[2]; 334 | // Must be an even number of symbol structs. 335 | PEsym sym0; 336 | PEsymaux sym0aux; 337 | PEsym sym1; 338 | PEsymaux sym1aux; 339 | PEsym sym2; 340 | PEsym sym3; 341 | uint32_t strtabsize; 342 | uint8_t space[4096]; 343 | } PEobj; 344 | ]] 345 | local symname = LJBC_PREFIX..ctx.modname 346 | local is64 = false 347 | if ctx.arch == "x86" then 348 | symname = "_"..symname 349 | elseif ctx.arch == "x64" then 350 | is64 = true 351 | end 352 | local symexport = " /EXPORT:"..symname..",DATA " 353 | 354 | -- The file format is always little-endian. Swap if the host is big-endian. 355 | local function f32(x) return x end 356 | local f16 = f32 357 | if ffi.abi("be") then 358 | f32 = bit.bswap 359 | function f16(x) return bit.rshift(bit.bswap(x), 16) end 360 | end 361 | 362 | -- Create PE object and fill in header. 363 | local o = ffi.new("PEobj") 364 | local hdr = o.hdr 365 | hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 366 | hdr.nsects = f16(2) 367 | hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 368 | hdr.nsyms = f32(6) 369 | 370 | -- Fill in sections and symbols. 371 | o.sect[0].name = ".drectve" 372 | o.sect[0].size = f32(#symexport) 373 | o.sect[0].flags = f32(0x00100a00) 374 | o.sym0.sect = f16(1) 375 | o.sym0.scl = 3 376 | o.sym0.name = ".drectve" 377 | o.sym0.naux = 1 378 | o.sym0aux.size = f32(#symexport) 379 | o.sect[1].name = ".rdata" 380 | o.sect[1].size = f32(#s) 381 | o.sect[1].flags = f32(0x40300040) 382 | o.sym1.sect = f16(2) 383 | o.sym1.scl = 3 384 | o.sym1.name = ".rdata" 385 | o.sym1.naux = 1 386 | o.sym1aux.size = f32(#s) 387 | o.sym2.sect = f16(2) 388 | o.sym2.scl = 2 389 | o.sym2.nameref[1] = f32(4) 390 | o.sym3.sect = f16(-1) 391 | o.sym3.scl = 2 392 | o.sym3.value = f32(1) 393 | o.sym3.name = "@feat.00" -- Mark as SafeSEH compliant. 394 | ffi.copy(o.space, symname) 395 | local ofs = #symname + 1 396 | o.strtabsize = f32(ofs + 4) 397 | o.sect[0].ofs = f32(ffi.offsetof(o, "space") + ofs) 398 | ffi.copy(o.space + ofs, symexport) 399 | ofs = ofs + #symexport 400 | o.sect[1].ofs = f32(ffi.offsetof(o, "space") + ofs) 401 | 402 | -- Write PE object file. 403 | local fp = savefile(output, "wb") 404 | fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs)) 405 | bcsave_tail(fp, output, s) 406 | end 407 | 408 | local function bcsave_machobj(ctx, output, s, ffi) 409 | ffi.cdef[[ 410 | typedef struct 411 | { 412 | uint32_t magic, cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags; 413 | } mach_header; 414 | typedef struct 415 | { 416 | mach_header; uint32_t reserved; 417 | } mach_header_64; 418 | typedef struct { 419 | uint32_t cmd, cmdsize; 420 | char segname[16]; 421 | uint32_t vmaddr, vmsize, fileoff, filesize; 422 | uint32_t maxprot, initprot, nsects, flags; 423 | } mach_segment_command; 424 | typedef struct { 425 | uint32_t cmd, cmdsize; 426 | char segname[16]; 427 | uint64_t vmaddr, vmsize, fileoff, filesize; 428 | uint32_t maxprot, initprot, nsects, flags; 429 | } mach_segment_command_64; 430 | typedef struct { 431 | char sectname[16], segname[16]; 432 | uint32_t addr, size; 433 | uint32_t offset, align, reloff, nreloc, flags; 434 | uint32_t reserved1, reserved2; 435 | } mach_section; 436 | typedef struct { 437 | char sectname[16], segname[16]; 438 | uint64_t addr, size; 439 | uint32_t offset, align, reloff, nreloc, flags; 440 | uint32_t reserved1, reserved2, reserved3; 441 | } mach_section_64; 442 | typedef struct { 443 | uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize; 444 | } mach_symtab_command; 445 | typedef struct { 446 | int32_t strx; 447 | uint8_t type, sect; 448 | int16_t desc; 449 | uint32_t value; 450 | } mach_nlist; 451 | typedef struct { 452 | uint32_t strx; 453 | uint8_t type, sect; 454 | uint16_t desc; 455 | uint64_t value; 456 | } mach_nlist_64; 457 | typedef struct 458 | { 459 | uint32_t magic, nfat_arch; 460 | } mach_fat_header; 461 | typedef struct 462 | { 463 | uint32_t cputype, cpusubtype, offset, size, align; 464 | } mach_fat_arch; 465 | typedef struct { 466 | struct { 467 | mach_header hdr; 468 | mach_segment_command seg; 469 | mach_section sec; 470 | mach_symtab_command sym; 471 | } arch[1]; 472 | mach_nlist sym_entry; 473 | uint8_t space[4096]; 474 | } mach_obj; 475 | typedef struct { 476 | struct { 477 | mach_header_64 hdr; 478 | mach_segment_command_64 seg; 479 | mach_section_64 sec; 480 | mach_symtab_command sym; 481 | } arch[1]; 482 | mach_nlist_64 sym_entry; 483 | uint8_t space[4096]; 484 | } mach_obj_64; 485 | typedef struct { 486 | mach_fat_header fat; 487 | mach_fat_arch fat_arch[4]; 488 | struct { 489 | mach_header hdr; 490 | mach_segment_command seg; 491 | mach_section sec; 492 | mach_symtab_command sym; 493 | } arch[4]; 494 | mach_nlist sym_entry; 495 | uint8_t space[4096]; 496 | } mach_fat_obj; 497 | ]] 498 | local symname = '_'..LJBC_PREFIX..ctx.modname 499 | local isfat, is64, align, mobj = false, false, 4, "mach_obj" 500 | if ctx.arch == "x64" then 501 | is64, align, mobj = true, 8, "mach_obj_64" 502 | elseif ctx.arch == "arm" then 503 | isfat, mobj = true, "mach_fat_obj" 504 | else 505 | check(ctx.arch == "x86", "unsupported architecture for OSX") 506 | end 507 | local function aligned(v, a) return bit.band(v+a-1, -a) end 508 | local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE. 509 | 510 | -- Create Mach-O object and fill in header. 511 | local o = ffi.new(mobj) 512 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) 513 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] 514 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] 515 | if isfat then 516 | o.fat.magic = be32(0xcafebabe) 517 | o.fat.nfat_arch = be32(#cpusubtype) 518 | end 519 | 520 | -- Fill in sections and symbols. 521 | for i=0,#cpusubtype-1 do 522 | local ofs = 0 523 | if isfat then 524 | local a = o.fat_arch[i] 525 | a.cputype = be32(cputype[i+1]) 526 | a.cpusubtype = be32(cpusubtype[i+1]) 527 | -- Subsequent slices overlap each other to share data. 528 | ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) 529 | a.offset = be32(ofs) 530 | a.size = be32(mach_size-ofs+#s) 531 | end 532 | local a = o.arch[i] 533 | a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface 534 | a.hdr.cputype = cputype[i+1] 535 | a.hdr.cpusubtype = cpusubtype[i+1] 536 | a.hdr.filetype = 1 537 | a.hdr.ncmds = 2 538 | a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) 539 | a.seg.cmd = is64 and 0x19 or 0x1 540 | a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) 541 | a.seg.vmsize = #s 542 | a.seg.fileoff = mach_size-ofs 543 | a.seg.filesize = #s 544 | a.seg.maxprot = 1 545 | a.seg.initprot = 1 546 | a.seg.nsects = 1 547 | ffi.copy(a.sec.sectname, "__data") 548 | ffi.copy(a.sec.segname, "__DATA") 549 | a.sec.size = #s 550 | a.sec.offset = mach_size-ofs 551 | a.sym.cmd = 2 552 | a.sym.cmdsize = ffi.sizeof(a.sym) 553 | a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs 554 | a.sym.nsyms = 1 555 | a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs 556 | a.sym.strsize = aligned(#symname+2, align) 557 | end 558 | o.sym_entry.type = 0xf 559 | o.sym_entry.sect = 1 560 | o.sym_entry.strx = 1 561 | ffi.copy(o.space+1, symname) 562 | 563 | -- Write Macho-O object file. 564 | local fp = savefile(output, "wb") 565 | fp:write(ffi.string(o, mach_size)) 566 | bcsave_tail(fp, output, s) 567 | end 568 | 569 | local function bcsave_obj(ctx, output, s) 570 | local ok, ffi = pcall(require, "ffi") 571 | check(ok, "FFI library required to write this file type") 572 | if ctx.os == "windows" then 573 | return bcsave_peobj(ctx, output, s, ffi) 574 | elseif ctx.os == "osx" then 575 | return bcsave_machobj(ctx, output, s, ffi) 576 | else 577 | return bcsave_elfobj(ctx, output, s, ffi) 578 | end 579 | end 580 | 581 | ------------------------------------------------------------------------------ 582 | 583 | local function bc_magic_header(input) 584 | local f, err = io.open(input, "rb") 585 | check(f, "cannot open ", err) 586 | local header = f:read(4) 587 | local match = (header == string.char(0x1b, 0x4c, 0x4a, 0x01)) 588 | f:close() 589 | return match 590 | end 591 | 592 | 593 | local function bccompile(ctx, input) 594 | local compile = require("sci-lang.compile") 595 | local ok, bcstring 596 | if ctx.string_input then 597 | ok, bcstring = compile.string(input) 598 | check(ok, "cannot compile string:", input) 599 | else 600 | if input == "-" then 601 | ok, bcstring = compile.file() 602 | else 603 | if bc_magic_header(input) then 604 | local f = io.open(input, "rb") 605 | check(f, "cannot open file") 606 | ok, bcstring = true, f:read("*a") 607 | f:close() 608 | else 609 | ok, bcstring = compile.file(input) 610 | end 611 | end 612 | check(ok, "cannot compile file:", input) 613 | end 614 | return bcstring 615 | end 616 | 617 | local function bclist(ctx, input, output) 618 | local s = bccompile(ctx, input) 619 | require("sci-lang.bcread").dump(s, savefile(output, "w"), input, ctx.hexdump) 620 | end 621 | 622 | local function bcsave(ctx, input, output) 623 | -- TODO: implement the ctx.strip option 624 | local s = bccompile(ctx, input) 625 | local t = ctx.type 626 | if not t then 627 | t = detecttype(output) 628 | ctx.type = t 629 | end 630 | if t == "raw" then 631 | bcsave_raw(output, s) 632 | else 633 | if not ctx.modname then ctx.modname = detectmodname(input) end 634 | if t == "obj" then 635 | bcsave_obj(ctx, output, s) 636 | else 637 | bcsave_c(ctx, output, s) 638 | end 639 | end 640 | end 641 | 642 | -- Process -b command line option. 643 | local function docmd(...) 644 | local arg = {...} 645 | local n = 1 646 | local list = false 647 | local ctx = { 648 | strip = true, arch = jit.arch, os = string.lower(jit.os), 649 | type = false, modname = false, hexdump = false, string_input = false, 650 | } 651 | while n <= #arg do 652 | local a = arg[n] 653 | if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 654 | table.remove(arg, n) 655 | if a == "--" then break end 656 | for m=2,#a do 657 | local opt = string.sub(a, m, m) 658 | if opt == "l" then 659 | list = true 660 | elseif opt == "s" then 661 | ctx.strip = true 662 | elseif opt == "g" then 663 | ctx.strip = false 664 | elseif opt == "x" then 665 | list = true 666 | ctx.hexdump = true 667 | else 668 | if arg[n] == nil or m ~= #a then usage() end 669 | if opt == "e" then 670 | if n ~= 1 then usage() end 671 | ctx.string_input = true 672 | elseif opt == "n" then 673 | ctx.modname = checkmodname(table.remove(arg, n)) 674 | elseif opt == "t" then 675 | ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 676 | elseif opt == "a" then 677 | ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 678 | elseif opt == "o" then 679 | ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 680 | else 681 | usage() 682 | end 683 | end 684 | end 685 | else 686 | n = n + 1 687 | end 688 | end 689 | if list then 690 | if #arg == 0 or #arg > 2 then usage() end 691 | bclist(ctx, arg[1], arg[2] or "-") 692 | else 693 | if #arg ~= 2 then usage() end 694 | bcsave(ctx, arg[1], arg[2]) 695 | end 696 | end 697 | 698 | return { start = docmd } 699 | -------------------------------------------------------------------------------- /compile.lua: -------------------------------------------------------------------------------- 1 | local lex_setup = require('sci-lang.lexer') 2 | local parse = require('sci-lang.parser') 3 | local ast = require('sci-lang.lua-ast').New() 4 | local reader = require('sci-lang.reader') 5 | local transform = require ('sci-lang.transform') 6 | 7 | -- Two kind of backend can be used to generate the code from the AST: 8 | -- - "generator", generates LuaJIT bytecode 9 | -- - "luacode-generator", generates Lua code 10 | -- 11 | -- Both can be used interchangeably, they take the AST tree and produce 12 | -- a string that can be passed to the function "loadstring". 13 | -- In the case of the bytecode generator the string will be actually a 14 | -- binary blob that corresponds to the generated bytecode. 15 | 16 | 17 | local function lang_toolkit_error(msg) 18 | if string.sub(msg, 1, 9) == "LLT-ERROR" then 19 | return false, "luajit-lang-toolkit: " .. string.sub(msg, 10) 20 | else 21 | error(msg) 22 | end 23 | end 24 | 25 | local function compile(reader, filename, options) 26 | local generator 27 | if options and options.code then 28 | generator = require('sci-lang.luacode-generator') 29 | else 30 | generator = require('sci-lang.generator') 31 | end 32 | local ls = lex_setup(reader, filename) 33 | local parse_success, tree = pcall(parse, ast, ls) 34 | if not parse_success then 35 | return lang_toolkit_error(tree) 36 | end 37 | local ttree = transform.root(tree) 38 | ttree = ttree or tree -- If nothing is returned, it's in-place transform. 39 | local success, luacode = pcall(generator, ttree, filename) 40 | if not success then 41 | return lang_toolkit_error(luacode) 42 | end 43 | return true, luacode 44 | end 45 | 46 | local function lang_loadstring(src, filename, options) 47 | return compile(reader.string(src), filename or "stdin", options) 48 | end 49 | 50 | local function lang_loadfile(filename, options) 51 | return compile(reader.file(filename), filename or "stdin", options) 52 | end 53 | 54 | return { string = lang_loadstring, file = lang_loadfile } 55 | -------------------------------------------------------------------------------- /generator.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- LuaJIT Language Toolkit. 3 | -- 4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved. 5 | -- 6 | -- Based on the original work of Richard Hundt, 7 | -- https://github.com/richardhundt/nyanga. 8 | -- 9 | -- See Copyright Notice in LICENSE 10 | -- 11 | 12 | local bc = require('sci-lang.bytecode') 13 | local const_eval = require("sci-lang.ast-const-eval") 14 | local boolean_const_eval = require("sci-lang.ast-boolean-const-eval") 15 | 16 | local ID = 0 17 | local function genid() 18 | ID = ID + 1 19 | return '__'..ID 20 | end 21 | 22 | local BC = bc.BC 23 | 24 | -- comparison operators with corresponding instruction. 25 | -- the boolean value indicate if the operands should be swapped. 26 | local cmpop = { 27 | ['<' ] = { 'LT', false }, 28 | ['>' ] = { 'LT', true }, 29 | ['<='] = { 'LE', false }, 30 | ['>='] = { 'LE', true }, 31 | ['=='] = { 'EQ', false }, 32 | ['~='] = { 'NE', false }, 33 | } 34 | 35 | -- the same of above but for the inverse tests 36 | local cmpopinv = { 37 | ['<' ] = { 'GE', false }, 38 | ['>' ] = { 'GE', true }, 39 | ['<='] = { 'GT', false }, 40 | ['>='] = { 'GT', true }, 41 | ['=='] = { 'NE', false }, 42 | ['~='] = { 'EQ', false }, 43 | } 44 | 45 | local function lang_error(msg, chunkname, line) 46 | error(string.format("LLT-ERROR%s:%d: %s", chunkname, line, msg), 0) 47 | end 48 | 49 | local MULTIRES = -1 50 | 51 | -- this should be considered like binary values to perform 52 | -- bitfield operations 53 | local EXPR_RESULT_TRUE, EXPR_RESULT_FALSE = 1, 2 54 | local EXPR_RESULT_BOTH = 3 55 | 56 | -- Infix arithmetic instructions 57 | local EXPR_EMIT_VN = { value = true, number = true } 58 | 59 | -- USETx, ISEQx and ISNEx instructions 60 | local EXPR_EMIT_VSNP = { value = true, string = true, number = true, primitive = true } 61 | 62 | -- TGETx/TSETx instructions 63 | local EXPR_EMIT_VSB = { value = true, string = true, byte = true } 64 | 65 | local function store_bit(cond) 66 | return cond and EXPR_RESULT_TRUE or EXPR_RESULT_FALSE 67 | end 68 | 69 | -- Logical XOR (exclusive OR) 70 | local function xor(a, b) 71 | return (a and not b) or (not a and b) 72 | end 73 | 74 | local StatementRule = { } 75 | local ExpressionRule = { } 76 | local MultiExprRule = { } 77 | local LHSExpressionRule = { } 78 | local TestRule = { } 79 | 80 | local function is_literal(node) 81 | return node.kind == 'Literal' 82 | end 83 | 84 | local function is_identifier(node) 85 | return node.kind == 'Identifier' 86 | end 87 | 88 | local function is_local_var(ctx, node) 89 | if node.kind == 'Identifier' then 90 | local info, uval = ctx:lookup(node.name) 91 | if info and not uval then 92 | return info.idx 93 | end 94 | end 95 | end 96 | 97 | local function is_vcall(node) 98 | return (MultiExprRule[node.kind] ~= nil) 99 | end 100 | 101 | local function mov_toreg(ctx, dest, src) 102 | if dest ~= src then 103 | ctx:op_move(dest, src) 104 | end 105 | end 106 | 107 | -- Conditionally move "src" to "dest" and jump to given target 108 | -- if "src" evaluate to true/false according to "cond". 109 | local function cond_mov_toreg(ctx, cond, dest, src, jump_label, jreg) 110 | if dest ~= src then 111 | ctx:op_testmov(cond, dest, src, jump_label, jreg) 112 | else 113 | ctx:op_test(cond, src, jump_label, jreg) 114 | end 115 | end 116 | 117 | local function is_byte_number(v) 118 | return type(v) == 'number' and v % 1 == 0 and v >= 0 and v < 256 119 | end 120 | 121 | -- ExpressionRule's entries take a node and a destination register (dest) 122 | -- used to store the result. At the end of the call no new registers are 123 | -- marked as used. 124 | -- ExpressionRule functions return nothing or a boolean value to indicate if 125 | -- a the expression terminate with a tail call instruction. 126 | 127 | function ExpressionRule:Literal(node, dest) 128 | self.ctx:op_load(dest, node.value) 129 | end 130 | 131 | function ExpressionRule:Identifier(node, dest) 132 | local name = node.name 133 | local var, uval = self.ctx:lookup(name) 134 | if var then 135 | if uval then 136 | -- Ensure variable is marked as upvalue in proto in take 137 | -- the upvalue index. 138 | local uv = self.ctx:upval(name) 139 | self.ctx:op_uget(dest, uv) 140 | else 141 | mov_toreg(self.ctx, dest, var.idx) 142 | end 143 | else 144 | self.ctx:op_gget(dest, name) 145 | end 146 | end 147 | 148 | function ExpressionRule:Vararg(node, dest) 149 | self.ctx:op_varg(dest, 1) 150 | end 151 | 152 | -- MultiExprRule's entries take a node and a number of wanted results (want) 153 | -- and an optional boolean argument "tail" that indicate to emit tail call 154 | -- if possible. 155 | -- The argument "want" can also be MULTIRES to indicate that the caller want 156 | -- as many results as the instructions returns. 157 | -- The code will store on the stack (starting from freereg) the number of 158 | -- wanted results. 159 | -- Return a first boolean value to indicate if many results are generated. 160 | -- A second boolean value indicate if a tail call was actually done. 161 | 162 | function MultiExprRule:Vararg(node, want) 163 | self.ctx:op_varg(self.ctx.freereg, want) 164 | return true, false -- Multiple results, no tail call. 165 | end 166 | 167 | local function expr_isk(self, node) 168 | local const = const_eval(node) 169 | if const then 170 | return true, const 171 | elseif node.kind == "Literal" then 172 | local t = type(node.value) 173 | return (t == "string" or t == "boolean" or t == "nil"), node.value 174 | else 175 | return false 176 | end 177 | end 178 | 179 | local function emit_tdup(self, dest, ins) 180 | local kidx, t = self.ctx:new_table_template() 181 | ins:rewrite(BC.TDUP, dest, kidx) 182 | return t 183 | end 184 | 185 | local function is_kint(x) 186 | return x % 1 == 0 and x >= 0 and x < 2^31 187 | end 188 | 189 | function ExpressionRule:Table(node, dest) 190 | if #node.keyvals == 0 then 191 | self.ctx:op_tnew(dest, 0, 0) 192 | return 193 | end 194 | 195 | local free = self.ctx.freereg 196 | local ins = self.ctx:op_tnew(free, 0, 0) 197 | self.ctx:nextreg() 198 | local t 199 | local vtop = self.ctx.freereg 200 | local narray, nhash = 0, 0 201 | local na, nh = 0, 0 202 | local zeroarr = 0 203 | for k = 1, #node.keyvals do 204 | local kv = node.keyvals[k] 205 | local value, key = kv[1], kv[2] 206 | if key then 207 | local k_is_const, kval = expr_isk(self, key) 208 | local v_is_const, vval = expr_isk(self, value) 209 | if k_is_const and kval ~= nil and v_is_const then 210 | if type(kval) == "number" and is_kint(kval) then 211 | if not t then t = emit_tdup(self, free, ins) end 212 | t.array[kval] = vval 213 | narray = math.max(narray, kval + 1) 214 | if kval == 0 then -- Zero-indexed array term. 215 | zeroarr = 1 216 | end 217 | else 218 | nhash = nhash + 1 219 | if not t then t = emit_tdup(self, free, ins) end 220 | -- NB: Adopt the "keyvals" style instead of hash_keys/values. 221 | t.hash_keys[nhash] = kval 222 | t.hash_values[nhash] = vval 223 | end 224 | else 225 | local ktag, kval = self:expr_toanyreg_tagged(key, EXPR_EMIT_VSB) 226 | local v = self:expr_toanyreg(value) 227 | self.ctx:op_tset(free, ktag, kval, v) 228 | self.ctx.freereg = vtop 229 | end 230 | nh = nh + 1 231 | else 232 | na = na + 1 233 | local is_const, expr_val = expr_isk(self, value) 234 | if is_const then 235 | if not t then t = emit_tdup(self, free, ins) end 236 | t.array[na] = expr_val 237 | narray = na + 1 238 | elseif is_vcall(value) and k == #node.keyvals then 239 | self:expr_tomultireg(value, MULTIRES) 240 | self.ctx:op_tsetm(free, na) 241 | else 242 | local ktag, kval 243 | if na < 256 then 244 | ktag, kval = 'B', na 245 | else 246 | ktag, kval = 'V', self.ctx:nextreg() 247 | self.ctx:op_load(kval, na) 248 | end 249 | local v = self:expr_toanyreg(value) 250 | self.ctx:op_tset(free, ktag, kval, v) 251 | self.ctx.freereg = vtop 252 | end 253 | end 254 | end 255 | 256 | if t then 257 | t.narray, t.nhash = narray, nhash 258 | else 259 | na = na + zeroarr 260 | nh = nh - zeroarr 261 | local sz = ins.tnewsize(na > 0 and na or nil, nh) 262 | ins:rewrite(BC.TNEW, free, sz) 263 | end 264 | 265 | mov_toreg(self.ctx, dest, free) 266 | 267 | self.ctx.freereg = free 268 | end 269 | 270 | -- Operations that admit instructions in the form ADDVV, ADDVN, ADDNV 271 | local dirop = { 272 | ['+'] = 'ADD', 273 | ['*'] = 'MUL', 274 | ['-'] = 'SUB', 275 | ['/'] = 'DIV', 276 | ['%'] = 'MOD', 277 | } 278 | 279 | function ExpressionRule:ConcatenateExpression(node, dest) 280 | local free = self.ctx.freereg 281 | for i = 1, #node.terms do 282 | self:expr_tonextreg(node.terms[i]) 283 | end 284 | self.ctx.freereg = free 285 | self.ctx:op_cat(dest, free, free + #node.terms - 1) 286 | end 287 | 288 | function ExpressionRule:BinaryExpression(node, dest, jreg) 289 | local free = self.ctx.freereg 290 | local o = node.operator 291 | if cmpop[o] then 292 | local l = genid() 293 | self:test_emit(node, l, jreg, false, EXPR_RESULT_BOTH, dest) 294 | self.ctx:here(l) 295 | elseif dirop[o] then 296 | local atag, a = self:expr_toanyreg_tagged(node.left, EXPR_EMIT_VN) 297 | local btag, b = self:expr_toanyreg_tagged(node.right, EXPR_EMIT_VN) 298 | if atag == "N" and btag == "N" then 299 | -- handle "nan" values here the same way LuaJIT does 300 | -- usually, both operands will always be 0 when both constant but 301 | -- re-check just to make sure, in order to trigger the assert when 302 | -- there's a bug in the generator 303 | local aval = const_eval(node.left) 304 | local bval = const_eval(node.right) 305 | if aval == 0 and bval == 0 then 306 | atag, a = "V", self.ctx.freereg 307 | self.ctx:op_load(self.ctx:nextreg(), 0) 308 | else 309 | assert(false, "operands are both constants") 310 | end 311 | end 312 | self.ctx.freereg = free 313 | self.ctx:op_infix(dirop[o], dest, atag, a, btag, b) 314 | else 315 | local a = self:expr_toanyreg(node.left) 316 | local b = self:expr_toanyreg(node.right) 317 | self.ctx.freereg = free 318 | if o == '^' then 319 | self.ctx:op_pow(dest, a, b) 320 | else 321 | error("bad binary operator: "..o, 2) 322 | end 323 | end 324 | end 325 | 326 | function ExpressionRule:ExpressionValue(node, dest, jreg) 327 | self:expr_toreg(node.value, dest, jreg) 328 | end 329 | 330 | function ExpressionRule:UnaryExpression(node, dest) 331 | local free = self.ctx.freereg 332 | local a = self:expr_toanyreg(node.argument) 333 | self.ctx.freereg = free 334 | local o = node.operator 335 | if o == '-' then 336 | self.ctx:op_unm(dest, a) 337 | elseif o == '#' then 338 | self.ctx:op_len(dest, a) 339 | elseif o == 'not' then 340 | self.ctx:op_not(dest, a) 341 | else 342 | error("bad unary operator: "..o, 2) 343 | end 344 | end 345 | 346 | function ExpressionRule:LogicalExpression(node, dest, jreg) 347 | local negate = (node.operator == 'or') 348 | local lstore = store_bit(negate) 349 | local l = genid() 350 | self:test_emit(node.left, l, jreg, negate, lstore, dest) 351 | self:expr_toreg(node.right, dest, jreg) 352 | self.ctx:here(l) 353 | end 354 | 355 | function ExpressionRule:MemberExpression(node, dest) 356 | local free = self.ctx.freereg 357 | local lhs = self:lhs_expr_emit(node) 358 | self.ctx.freereg = free 359 | self.ctx:op_tget(dest, lhs.target, lhs.key_type, lhs.key) 360 | end 361 | 362 | function StatementRule:FunctionDeclaration(node) 363 | local path = node.id 364 | local lhs 365 | if node.locald then 366 | -- We avoid calling "lhs_expr_emit" on "path" because 367 | -- it would mark the variable as mutable. 368 | local vinfo = self.ctx:newvar(path.name) 369 | self:expr_toreg(node, vinfo.idx) 370 | local pc = #self.ctx.code + 1 371 | vinfo.startpc = pc 372 | vinfo.endpc = pc 373 | else 374 | lhs = self:lhs_expr_emit(path) 375 | self:expr_tolhs(lhs, node) 376 | end 377 | end 378 | 379 | function ExpressionRule:FunctionExpression(node, dest) 380 | local free = self.ctx.freereg 381 | local child = self.ctx:child(node.firstline, node.lastline) 382 | self.ctx = child 383 | for i=1, #node.params do 384 | if node.params[i].kind == 'Vararg' then 385 | self.ctx.flags = bit.bor(self.ctx.flags, bc.Proto.VARARG) 386 | else 387 | self.ctx:param(node.params[i].name) 388 | end 389 | end 390 | self:block_emit(node.body) 391 | self:close_proto(node.lastline) 392 | 393 | self.ctx = self.ctx:parent() 394 | self.ctx.freereg = free 395 | self.ctx:line(node.lastline) 396 | self.ctx:op_fnew(dest, child.idx) 397 | end 398 | 399 | ExpressionRule.FunctionDeclaration = ExpressionRule.FunctionExpression 400 | 401 | local function emit_call_expression(self, node, want, use_tail, use_self) 402 | local free = self.ctx.freereg 403 | 404 | if use_self then 405 | local obj = self:expr_toanyreg(node.receiver) 406 | self.ctx:op_move(free + 1, obj) 407 | self.ctx:setreg(free + 2) 408 | local method_type, method = self:property_tagged(node.method.name) 409 | self.ctx:op_tget(free, obj, method_type, method) 410 | self.ctx.freereg = free + 2 411 | else 412 | self:expr_tonextreg(node.callee) 413 | end 414 | 415 | local narg = #node.arguments 416 | for i=1, narg - 1 do 417 | self:expr_tonextreg(node.arguments[i]) 418 | end 419 | local mres = false 420 | if narg > 0 then 421 | local lastarg = node.arguments[narg] 422 | mres = self:expr_tomultireg(lastarg, MULTIRES) 423 | self.ctx:nextreg() 424 | end 425 | 426 | if use_self then narg = narg + 1 end 427 | self.ctx.freereg = free 428 | if mres then 429 | if use_tail then 430 | self.ctx:close_uvals() 431 | self.ctx:op_callmt(free, narg - 1) 432 | else 433 | self.ctx:op_callm(free, want, narg - 1) 434 | end 435 | else 436 | if use_tail then 437 | self.ctx:close_uvals() 438 | self.ctx:op_callt(free, narg) 439 | else 440 | self.ctx:op_call(free, want, narg) 441 | end 442 | end 443 | 444 | return want == MULTIRES, use_tail 445 | end 446 | 447 | function MultiExprRule:CallExpression(node, want, tail) 448 | return emit_call_expression(self, node, want, tail, false) 449 | end 450 | 451 | function MultiExprRule:SendExpression(node, want, tail) 452 | return emit_call_expression(self, node, want, tail, true) 453 | end 454 | 455 | function LHSExpressionRule:Identifier(node) 456 | local info, uval = self.ctx:lookup(node.name) 457 | if uval then 458 | -- Ensure variable is marked as upvalue in proto and take 459 | -- upvalue index. 460 | info.mutable = true 461 | local uv = self.ctx:upval(node.name) 462 | return {tag = 'upval', uv = uv} 463 | elseif info then 464 | info.mutable = true 465 | return {tag = 'local', target = info.idx} 466 | else 467 | return {tag = 'global', name = node.name} 468 | end 469 | end 470 | 471 | function LHSExpressionRule:MemberExpression(node) 472 | local target = self:expr_toanyreg(node.object) 473 | local key_type, key 474 | if node.computed then 475 | key_type, key = self:expr_toanyreg_tagged(node.property, EXPR_EMIT_VSB) 476 | else 477 | key_type, key = self:property_tagged(node.property.name) 478 | end 479 | return { tag = 'member', target = target, key = key, key_type = key_type } 480 | end 481 | 482 | function TestRule:Literal(node, jmp, jreg, negate, store, dest) 483 | local value = node.value 484 | if bit.band(store, store_bit(value)) ~= 0 then 485 | self:expr_toreg(node, dest) 486 | else 487 | jreg = self.ctx.freereg 488 | end 489 | if (negate and value) or (not negate and not value) then 490 | self.ctx:jump(jmp, jreg) 491 | end 492 | end 493 | 494 | local function compare_op(negate, op) 495 | local oper_table = negate and cmpop or cmpopinv 496 | local e = oper_table[op] 497 | return e[1], e[2] 498 | end 499 | 500 | -- Return true IFF the variable "store" has the EXPR_RESULT_FALSE bit 501 | -- set. If "negate" is true check the EXPR_RESULT_TRUE bit instead. 502 | local function has_branch(store, negate) 503 | return bit.band(store, store_bit(negate)) ~= 0 504 | end 505 | 506 | function TestRule:BinaryExpression(node, jmp, jreg, negate, store, dest) 507 | local o = node.operator 508 | if cmpop[o] then 509 | local free = self.ctx.freereg 510 | local atag, a, btag, b 511 | if o == '==' or o == '~=' then 512 | atag, a = self:expr_toanyreg_tagged(node.left, EXPR_EMIT_VSNP) 513 | if atag == 'V' then 514 | btag, b = self:expr_toanyreg_tagged(node.right, EXPR_EMIT_VSNP) 515 | else 516 | btag, b = atag, a 517 | atag, a = 'V', self:expr_toanyreg(node.right) 518 | end 519 | else 520 | a = self:expr_toanyreg(node.left) 521 | b = self:expr_toanyreg(node.right) 522 | end 523 | self.ctx.freereg = free 524 | local use_imbranch = has_branch(store, negate) 525 | if use_imbranch then 526 | local test, swap = compare_op(not negate, o) 527 | local altlabel = genid() 528 | self.ctx:op_comp(test, a, btag, b, altlabel, free, swap) 529 | self.ctx:op_load(dest, negate) 530 | self.ctx:jump(jmp, jreg) 531 | self.ctx:here(altlabel) 532 | self.ctx.freereg = free 533 | else 534 | local test, swap = compare_op(negate, o) 535 | self.ctx:op_comp(test, a, btag, b, jmp, free, swap) 536 | end 537 | if has_branch(store, not negate) then 538 | self.ctx:op_load(dest, not negate) 539 | end 540 | else 541 | self:expr_test(node, jmp, jreg, negate, store, dest) 542 | end 543 | end 544 | 545 | function TestRule:UnaryExpression(node, jmp, jreg, negate, store, dest) 546 | if node.operator == 'not' and store == 0 then 547 | self:test_emit(node.argument, jmp, jreg, not negate) 548 | else 549 | self:expr_test(node, jmp, jreg, negate, store, dest or self.ctx.freereg) 550 | end 551 | end 552 | 553 | function TestRule:LogicalExpression(node, jmp, jreg, negate, store, dest) 554 | local or_operator = (node.operator == "or") 555 | local lstore = bit.band(store, store_bit(or_operator)) 556 | local imbranch = xor(negate, or_operator) 557 | if imbranch then 558 | local templ = genid() 559 | self:test_emit(node.left, templ, jreg, not negate, lstore, dest) 560 | self:test_emit(node.right, jmp, jreg, negate, store, dest) 561 | self.ctx:here(templ) 562 | else 563 | self:test_emit(node.left, jmp, jreg, negate, lstore, dest) 564 | self:test_emit(node.right, jmp, jreg, negate, store, dest) 565 | end 566 | end 567 | 568 | function StatementRule:CallExpression(node) 569 | self:expr_tomultireg(node, 0, false) 570 | end 571 | 572 | function StatementRule:SendExpression(node) 573 | self:expr_tomultireg(node, 0, false) 574 | end 575 | 576 | function StatementRule:LabelStatement(node) 577 | local ok, label = self.ctx:goto_label(node.label) 578 | if not ok then 579 | lang_error(label, self.chunkname, node.line) 580 | end 581 | end 582 | 583 | function StatementRule:GotoStatement(node) 584 | self.ctx:goto_jump(node.label, node.line) 585 | end 586 | 587 | function StatementRule:DoStatement(node) 588 | self:block_enter() 589 | self:block_emit(node.body) 590 | self:block_leave(node.body.lastline) 591 | end 592 | 593 | function StatementRule:IfStatement(node, root_exit) 594 | local free = self.ctx.freereg 595 | local ncons = #node.tests 596 | -- Count the number of branches, including the "else" branch. 597 | local count = node.alternate and ncons + 1 or ncons 598 | local local_exit = count > 1 and genid() 599 | -- Set the exit point to the extern exit if given or set to local 600 | -- exit (potentially false). 601 | local exit = root_exit or local_exit 602 | 603 | for i = 1, ncons do 604 | local test, block = node.tests[i], node.cons[i] 605 | local next_test = genid() 606 | -- Set the exit point to jump on at the end of for this block. 607 | -- If this is the last branch (count == 1) set to false. 608 | local bexit = count > 1 and exit 609 | 610 | self:test_emit(test, next_test, free) 611 | 612 | self:block_enter() 613 | self:block_emit(block, bexit) 614 | self:block_leave(block.lastline, bexit) 615 | 616 | self.ctx:here(next_test) 617 | count = count - 1 618 | end 619 | 620 | if node.alternate then 621 | self:block_enter() 622 | self:block_emit(node.alternate) 623 | self:block_leave(node.alternate.lastline) 624 | end 625 | if exit and exit == local_exit then 626 | self.ctx:here(exit) 627 | end 628 | self.ctx.freereg = free 629 | end 630 | function StatementRule:ExpressionStatement(node) 631 | return self:emit(node.expression) 632 | end 633 | function StatementRule:LocalDeclaration(node) 634 | local nvars = #node.names 635 | local nexps = #node.expressions 636 | local base = self.ctx.freereg 637 | local slots = nvars 638 | for i = 1, nexps - 1 do 639 | if slots == 0 then break end 640 | self:expr_tonextreg(node.expressions[i]) 641 | slots = slots - 1 642 | end 643 | 644 | if slots > 0 then 645 | if nexps > 0 then 646 | self:expr_tomultireg(node.expressions[nexps], slots) 647 | else 648 | self.ctx:op_nils(base, slots) 649 | end 650 | self.ctx:nextreg(slots) 651 | end 652 | 653 | for i=1, nvars do 654 | local lhs = node.names[i] 655 | self.ctx:newvar(lhs.name, base + (i - 1)) 656 | end 657 | end 658 | 659 | -- Eliminate write-after-read hazards for local variable assignment. 660 | -- Implement the same approach found in lj_parse.c from luajit. 661 | -- Check left-hand side against variable register "reg". 662 | local function assign_hazard(self, lhs, reg) 663 | local tmp = self.ctx.freereg -- Rename to this temp. register (if needed). 664 | local hazard = false 665 | for i = #lhs, 1, -1 do 666 | if lhs[i].tag == 'member' then 667 | if lhs[i].target == reg then -- t[i], t = 1, 2 668 | hazard = true 669 | lhs[i].target = tmp 670 | end 671 | if lhs[i].key_type == 'V' and 672 | lhs[i].key == reg then -- t[i], i = 1, 2 673 | hazard = true 674 | lhs[i].key = tmp 675 | end 676 | end 677 | end 678 | if hazard then 679 | self.ctx:nextreg() 680 | self.ctx:op_move(tmp, reg) 681 | end 682 | end 683 | 684 | function StatementRule:AssignmentExpression(node) 685 | local free = self.ctx.freereg 686 | local nvars = #node.left 687 | local nexps = #node.right 688 | 689 | local lhs = { } 690 | for i = 1, nvars do 691 | local va = self:lhs_expr_emit(node.left[i]) 692 | if va.tag == 'local' then 693 | assign_hazard(self, lhs, va.target) 694 | end 695 | lhs[i] = va 696 | end 697 | 698 | local slots = nvars 699 | local exprs = { } 700 | for i=1, nexps - 1 do 701 | if slots == 0 then break end 702 | -- LuaJIT compatibility: 703 | -- Use a temporary register even the LHS is not an immediate local 704 | -- variable. 705 | local use_reg = true 706 | -- local use_reg = is_local_var(self.ctx, node.left[i]) 707 | if use_reg then 708 | exprs[i] = self:expr_tonextreg(node.right[i]) 709 | else 710 | exprs[i] = self:expr_toanyreg(node.right[i]) 711 | end 712 | slots = slots - 1 713 | end 714 | 715 | local i = nexps 716 | if slots == 1 then 717 | -- Case where (nb of expression) >= (nb of variables). 718 | self:expr_tolhs(lhs[i], node.right[i]) 719 | else 720 | -- Case where (nb of expression) < (nb of variables). In this case 721 | -- we cosider that the last expression can generate multiple values. 722 | local exp_base = self.ctx.freereg 723 | self:expr_tomultireg(node.right[i], slots) 724 | for k = slots - 1, 0, -1 do 725 | self:assign(lhs[i + k], exp_base + k) 726 | end 727 | end 728 | 729 | for i = nvars - slots, 1, -1 do 730 | self:assign(lhs[i], exprs[i]) 731 | end 732 | 733 | self.ctx.freereg = free 734 | end 735 | function StatementRule:WhileStatement(node) 736 | local free = self.ctx.freereg 737 | local loop, exit = genid(), genid() 738 | self:loop_enter(exit, free) 739 | self.ctx:here(loop) 740 | self:test_emit(node.test, exit, free) 741 | self.ctx:loop(exit) 742 | self:block_emit(node.body) 743 | self.ctx:jump(loop, free) 744 | self.ctx:here(exit) 745 | self:loop_leave(node.lastline) 746 | self.ctx.freereg = free 747 | end 748 | function StatementRule:RepeatStatement(node) 749 | local free = self.ctx.freereg 750 | local loop, exit = genid(), genid() 751 | self:loop_enter(exit, free) 752 | self.ctx:here(loop) 753 | self.ctx:loop(exit) 754 | self:block_emit(node.body) 755 | self:test_emit(node.test, loop, free) 756 | self.ctx:here(exit) 757 | self:loop_leave(node.lastline) 758 | self.ctx.freereg = free 759 | end 760 | function StatementRule:BreakStatement() 761 | local base, exit, need_uclo = self.ctx:current_loop() 762 | self.ctx:scope_jump(exit, base, need_uclo) 763 | self.ctx.scope.need_uclo = false 764 | end 765 | function StatementRule:ForStatement(node) 766 | local free = self.ctx.freereg 767 | local exit = genid() 768 | local init = node.init 769 | local name = init.id.name 770 | local line = node.line 771 | 772 | self:expr_tonextreg(init.value) 773 | self:expr_tonextreg(node.last) 774 | if node.step then 775 | self:expr_tonextreg(node.step) 776 | else 777 | self.ctx:op_load(self.ctx.freereg, 1) 778 | self.ctx:nextreg() 779 | end 780 | local forivinfo = self.ctx:forivars(0x01) 781 | local loop = self.ctx:op_fori(free) 782 | self:loop_enter(exit, free) 783 | self.ctx:newvar(name) 784 | self:block_enter() 785 | self:block_emit(node.body) 786 | self:block_leave() 787 | self:loop_leave(node.body.lastline) 788 | self.ctx:op_forl(free, loop) 789 | self.ctx:setpcline(line) 790 | forivinfo.endpc = #self.ctx.code 791 | self.ctx:here(exit) 792 | self.ctx.freereg = free 793 | end 794 | function StatementRule:ForInStatement(node) 795 | local free = self.ctx.freereg 796 | local iter = free + 3 797 | local line = node.line 798 | 799 | local loop, exit = genid(), genid() 800 | 801 | local vars = node.namelist.names 802 | local iter_list = node.explist 803 | 804 | local iter_count = 0 805 | for i = 1, #iter_list - 1 do 806 | self:expr_tonextreg(iter_list[i]) 807 | iter_count = iter_count + 1 808 | if iter_count == 2 then break end 809 | end 810 | 811 | self:expr_tomultireg(iter_list[iter_count+1], 3 - iter_count) -- func, state, ctl 812 | self.ctx:setreg(iter) 813 | local forivinfo = self.ctx:forivars(0x04) 814 | self.ctx:jump(loop, self.ctx.freereg) 815 | 816 | self:loop_enter(exit, free) 817 | 818 | for i=1, #vars do 819 | local name = vars[i].name 820 | self.ctx:newvar(name, iter + i - 1) 821 | self.ctx:setreg(iter + i) 822 | end 823 | 824 | local ltop = self.ctx:here(genid()) 825 | self:block_emit(node.body) 826 | self:loop_leave(node.lastline) 827 | self.ctx:here(loop) 828 | self.ctx:op_iterc(iter, #vars) 829 | self.ctx:setpcline(line) 830 | self.ctx:op_iterl(iter, ltop) 831 | self.ctx:setpcline(line) 832 | forivinfo.endpc = #self.ctx.code 833 | self.ctx:here(exit) 834 | self.ctx.freereg = free 835 | end 836 | 837 | function StatementRule:ReturnStatement(node) 838 | local narg = #node.arguments 839 | local local_var = narg == 1 and is_local_var(self.ctx, node.arguments[1]) 840 | if narg == 0 then 841 | self.ctx:close_uvals() 842 | self.ctx:op_ret0() 843 | elseif local_var then 844 | self.ctx:close_uvals() 845 | self.ctx:op_ret1(local_var) 846 | else 847 | local base = self.ctx.freereg 848 | for i=1, narg - 1 do 849 | self:expr_tonextreg(node.arguments[i]) 850 | end 851 | local lastarg = node.arguments[narg] 852 | local request_tcall = (narg == 1) 853 | local mret, tail = self:expr_tomultireg(lastarg, MULTIRES, request_tcall) 854 | self.ctx.freereg = base 855 | if not tail then 856 | self.ctx:close_uvals() 857 | if mret then 858 | self.ctx:op_retm(base, narg - 1) 859 | elseif narg == 1 then 860 | self.ctx:op_ret1(base) 861 | else 862 | self.ctx:op_ret(base, narg) 863 | end 864 | end 865 | end 866 | if self.ctx:is_root_scope() then 867 | self.ctx.explret = true 868 | end 869 | end 870 | 871 | function StatementRule:Chunk(node, name) 872 | self:block_emit(node.body) 873 | self:close_proto() 874 | end 875 | 876 | local function generate(tree, name) 877 | local self = { line = 0 } 878 | self.main = bc.Proto.new(bc.Proto.VARARG, tree.firstline, tree.lastline) 879 | self.ctx = self.main 880 | self.chunkname = tree.chunkname 881 | 882 | function self:block_enter() 883 | self.ctx:enter() 884 | end 885 | 886 | function self:block_leave(lastline, exit) 887 | self.ctx:fscope_end() 888 | self.ctx:close_block(self.ctx.scope.basereg, exit) 889 | self.ctx:leave() 890 | if lastline then self.ctx:line(lastline) end 891 | end 892 | 893 | function self:loop_enter(exit, exit_reg) 894 | self:block_enter() 895 | self.ctx:loop_register(exit, exit_reg) 896 | end 897 | 898 | function self:loop_leave(lastline) 899 | self:block_leave(lastline) 900 | end 901 | 902 | function self:assign(lhs, expr) 903 | local saveline = self.ctx.currline 904 | self.ctx:line(lhs.line) 905 | if lhs.tag == 'member' then 906 | -- SET instructions with a Primitive "P" index are not accepted. 907 | -- The method self:lhs_expr_emit does never generate such requests. 908 | assert(lhs.key_type ~= 'P', "invalid assignment instruction") 909 | self.ctx:op_tset(lhs.target, lhs.key_type, lhs.key, expr) 910 | elseif lhs.tag == 'upval' then 911 | self.ctx:op_uset(lhs.uv, 'V', expr) 912 | elseif lhs.tag == 'local' then 913 | mov_toreg(self.ctx, lhs.target, expr) 914 | else 915 | self.ctx:op_gset(expr, lhs.name) 916 | end 917 | self.ctx:line(saveline) 918 | end 919 | 920 | function self:emit(node, ...) 921 | if node.line then self.ctx:line(node.line) end 922 | local rule = StatementRule[node.kind] 923 | if not rule then error("cannot find a statement rule for " .. node.kind) end 924 | rule(self, node, ...) 925 | end 926 | 927 | function self:block_emit(stmts, if_exit) 928 | local n = #stmts 929 | for i = 1, n - 1 do 930 | self:emit(stmts[i]) 931 | end 932 | if n > 0 then 933 | self:emit(stmts[n], if_exit) 934 | end 935 | end 936 | 937 | -- Emit the code to evaluate "node" and perform a conditional 938 | -- jump based on its value. 939 | -- The arguments "jmp" and "jreg" are respectively the jump location 940 | -- and the rbase operand for the JMP operation if the store is performed. 941 | -- When no store is done JMP will use "freereg" as rbase operand. 942 | -- If "negate" is false the jump on FALSE and viceversa. 943 | -- The argument "store" is a bitfield that specifies which 944 | -- computed epxression should be stored. The bit EXPR_RESULT_TRUE 945 | -- means that the value should be stored when its value is "true". 946 | -- If "store" is not ZERO than dest should be the register 947 | -- destination for the result. 948 | function self:test_emit(node, jmp, jreg, negate, store, dest) 949 | if node.line then self.ctx:line(node.line) end 950 | local rule = TestRule[node.kind] 951 | store = store or 0 952 | if rule then 953 | rule(self, node, jmp, jreg, negate, store, dest) 954 | else 955 | self:expr_test(node, jmp, jreg, negate, store, dest) 956 | end 957 | end 958 | 959 | -- Emit code to test an expression as a boolean value 960 | function self:expr_test(node, jmp, jreg, negate, store, dest) 961 | local free = self.ctx.freereg 962 | local const_val = boolean_const_eval(node) 963 | if const_val ~= nil then 964 | if bit.band(store, store_bit(const_val)) ~= 0 then 965 | self.ctx:op_load(dest, const_val) 966 | end 967 | if xor(negate, not const_val) then 968 | self.ctx:jump(jmp, jreg) 969 | end 970 | else 971 | local expr = self:expr_toanyreg(node) 972 | if store ~= 0 then 973 | cond_mov_toreg(self.ctx, negate, dest, expr, jmp, self.ctx.freereg) 974 | else 975 | self.ctx:op_test(negate, expr, jmp, self.ctx.freereg) 976 | end 977 | end 978 | self.ctx.freereg = free 979 | end 980 | 981 | -- Emit code to compute the "node" expression in any register. Return 982 | -- the register itself and an optional boolean value to indicate if a 983 | -- tail call was used. 984 | -- If a new register is needed to store the results one is automatically 985 | -- allocated and marked as used. 986 | function self:expr_toanyreg(node, tail) 987 | local localvar = is_local_var(self.ctx, node) 988 | if localvar then 989 | return localvar, false 990 | else 991 | local dest = self.ctx.freereg 992 | local tailcall = self:expr_toreg(node, dest, dest + 1, tail) 993 | return self.ctx:nextreg(), tailcall 994 | end 995 | end 996 | 997 | -- Emit code to compute the "node" expression by storing the result in 998 | -- the given register "dest". The argument "jreg" indicate the next free 999 | -- register to jump in for "test_emit" call (logical expressions). 1000 | -- The function does return an optional boolean value to indicate if 1001 | -- a tail call was actually used. 1002 | -- This function always leave the freereg counter to its initial value. 1003 | function self:expr_toreg(node, dest, jreg, tail) 1004 | if node.line then self.ctx:line(node.line) end 1005 | local const_val = const_eval(node) 1006 | if const_val then 1007 | self.ctx:op_load(dest, const_val) 1008 | else 1009 | local rule = ExpressionRule[node.kind] 1010 | if rule then 1011 | rule(self, node, dest, jreg or self.ctx.freereg) 1012 | elseif MultiExprRule[node.kind] then 1013 | rule = MultiExprRule[node.kind] 1014 | local base = self.ctx.freereg 1015 | local mres, tailcall = rule(self, node, 1, base == dest and tail) 1016 | mov_toreg(self.ctx, dest, base) 1017 | return tailcall 1018 | else 1019 | error("Cannot find an ExpressionRule for " .. node.kind) 1020 | end 1021 | end 1022 | return false -- no tail call 1023 | end 1024 | 1025 | -- Emit code to compute the "node" expression in the next available register 1026 | -- and increment afterward the free register counter. 1027 | -- It does call "expr_toreg" with (dest + 1) as "jreg" argument to inform 1028 | -- an eventual "test_emit" call that the next free register after the expression 1029 | -- store is (dest + 1). 1030 | function self:expr_tonextreg(node) 1031 | local dest = self.ctx.freereg 1032 | self:expr_toreg(node, dest, dest + 1) 1033 | self.ctx:setreg(dest + 1) 1034 | return dest 1035 | end 1036 | 1037 | -- Generate the code to store multiple values in consecutive registers 1038 | -- starting from the current "freereg". The argument "want" indicate 1039 | -- how many values should be generated or MULTIRES. 1040 | -- The optional boolean parameter "tail" indicate if a tail call instruction 1041 | -- should be generated if possible. 1042 | -- Return two boolean values. The first indicate if it does return multi 1043 | -- results. The second if a tail call was actually generated. 1044 | function self:expr_tomultireg(node, want, tail) 1045 | if node.line then self.ctx:line(node.line) end 1046 | local rule = MultiExprRule[node.kind] 1047 | if rule then 1048 | return rule(self, node, want, tail) 1049 | elseif (want > 0 or want == MULTIRES) then 1050 | local dest = self.ctx.freereg 1051 | self:expr_toreg(node, dest, dest + 1) 1052 | self.ctx:maxframe(dest + 1) 1053 | if want > 1 then 1054 | self.ctx:op_nils(dest + 1, want - 1) 1055 | self.ctx:maxframe(dest + want) 1056 | end 1057 | return false, false 1058 | end 1059 | end 1060 | 1061 | -- Like "expr_toreg" but it can return an expression (register) or 1062 | -- an immediate constant. It does return a tag and then the value 1063 | -- itself. 1064 | function self:expr_toanyreg_tagged(node, emit) 1065 | local const_val = const_eval(node) 1066 | if emit.byte and const_val and is_byte_number(const_val) then 1067 | return 'B', const_val 1068 | elseif emit.number and const_val then 1069 | return 'N', self.ctx:const(const_val) 1070 | end 1071 | if node.kind == 'Literal' then 1072 | local value = node.value 1073 | local tv = type(value) 1074 | if emit.primitive and (tv == 'nil' or tv == 'boolean') then 1075 | return 'P', self.ctx:kpri(value) 1076 | elseif emit.string and tv == 'string' then 1077 | return self:property_tagged(value) 1078 | end 1079 | -- fall through 1080 | end 1081 | return 'V', self:expr_toanyreg(node) 1082 | end 1083 | 1084 | 1085 | function self:property_tagged(property_name) 1086 | local kprop = self.ctx:const(property_name) 1087 | if kprop < 255 then 1088 | return 'S', kprop 1089 | else 1090 | local prop = self.ctx:nextreg() 1091 | self.ctx:op_load(prop, property_name) 1092 | return 'V', prop 1093 | end 1094 | end 1095 | 1096 | -- Emit code to store an expression in the given LHS. 1097 | function self:expr_tolhs(lhs, expr) 1098 | local free = self.ctx.freereg 1099 | if lhs.tag == 'upval' then 1100 | local tag, expr = self:expr_toanyreg_tagged(expr, EXPR_EMIT_VSNP) 1101 | self.ctx:op_uset(lhs.uv, tag, expr) 1102 | self.ctx:setpcline(lhs.line) 1103 | elseif lhs.tag == 'local' then 1104 | self:expr_toreg(expr, lhs.target) 1105 | else 1106 | local reg = self:expr_toanyreg(expr) 1107 | self:assign(lhs, reg) 1108 | end 1109 | self.ctx.freereg = free 1110 | end 1111 | 1112 | function self:lhs_expr_emit(node) 1113 | local line = self.ctx.currline 1114 | local rule = assert(LHSExpressionRule[node.kind], "undefined assignment rule for node type: \"" .. node.kind .. "\"") 1115 | local lhs = rule(self, node) 1116 | lhs.line = line 1117 | return lhs 1118 | end 1119 | 1120 | function self:close_proto(lastline) 1121 | if lastline then self.ctx:line(lastline) end 1122 | local err, line = self.ctx:close_proto() 1123 | if err then 1124 | lang_error(err, self.chunkname, line) 1125 | end 1126 | end 1127 | 1128 | self:emit(tree) 1129 | 1130 | local dump = bc.Dump.new(self.main, name) 1131 | return dump:pack() 1132 | end 1133 | 1134 | return generate 1135 | -------------------------------------------------------------------------------- /lexer.lua: -------------------------------------------------------------------------------- 1 | local ffi = require('ffi') 2 | 3 | local band = bit.band 4 | local strsub, strbyte, strchar = string.sub, string.byte, string.char 5 | 6 | local ASCII_0, ASCII_9 = 48, 57 7 | local ASCII_a, ASCII_f, ASCII_z = 97, 102, 122 8 | local ASCII_A, ASCII_Z = 65, 90 9 | 10 | local END_OF_STREAM = -1 11 | 12 | local ReservedKeyword = {['and'] = 1, ['break'] = 2, ['do'] = 3, ['else'] = 4, ['elseif'] = 5, ['end'] = 6, ['false'] = 7, ['for'] = 8, ['function'] = 9, ['goto'] = 10, ['if'] = 11, ['in'] = 12, ['local'] = 13, ['nil'] = 14, ['not'] = 15, ['or'] = 16, ['repeat'] = 17, ['return'] = 18, ['then'] = 19, ['true'] = 20, ['until'] = 21, ['while'] = 22 } 13 | 14 | local uint64, int64 = ffi.typeof('uint64_t'), ffi.typeof('int64_t') 15 | local complex = ffi.typeof('complex') 16 | 17 | local TokenSymbol = { TK_ge = '>=', TK_le = '<=' , TK_concat = '..', TK_eq = '==', TK_ne = '~=', TK_eof = '' } 18 | 19 | local function token2str(tok) 20 | if string.match(tok, "^TK_") then 21 | return TokenSymbol[tok] or string.sub(tok, 4) 22 | else 23 | return tok 24 | end 25 | end 26 | 27 | local function error_lex(chunkname, tok, line, em, ...) 28 | local emfmt = string.format(em, ...) 29 | local msg = string.format("%s:%d: %s", chunkname, line, emfmt) 30 | if tok then 31 | msg = string.format("%s near '%s'", msg, tok) 32 | end 33 | error("LLT-ERROR" .. msg, 0) 34 | end 35 | 36 | local function lex_error(ls, token, em, ...) 37 | local tok 38 | if token == 'TK_name' or token == 'TK_string' or token == 'TK_number' then 39 | tok = ls.save_buf 40 | elseif token then 41 | tok = token2str(token) 42 | end 43 | error_lex(ls.chunkname, tok, ls.linenumber, em, ...) 44 | end 45 | 46 | local function char_isident(c) 47 | if type(c) == 'string' then 48 | local b = strbyte(c) 49 | if b >= ASCII_0 and b <= ASCII_9 then 50 | return true 51 | elseif b >= ASCII_a and b <= ASCII_z then 52 | return true 53 | elseif b >= ASCII_A and b <= ASCII_Z then 54 | return true 55 | else 56 | return (c == '_') 57 | end 58 | end 59 | return false 60 | end 61 | 62 | local function char_isdigit(c) 63 | if type(c) == 'string' then 64 | local b = strbyte(c) 65 | return b >= ASCII_0 and b <= ASCII_9 66 | end 67 | return false 68 | end 69 | 70 | local function char_isspace(c) 71 | local b = strbyte(c) 72 | return b >= 9 and b <= 13 or b == 32 73 | end 74 | 75 | local function byte(ls, n) 76 | local k = ls.p + n 77 | return strsub(ls.data, k, k) 78 | end 79 | 80 | local function skip(ls, n) 81 | ls.n = ls.n - n 82 | ls.p = ls.p + n 83 | end 84 | 85 | local function pop(ls) 86 | local k = ls.p 87 | local c = strsub(ls.data, k, k) 88 | ls.p = k + 1 89 | ls.n = ls.n - 1 90 | return c 91 | end 92 | 93 | local function fillbuf(ls) 94 | local data = ls:read_func() 95 | if not data then 96 | return END_OF_STREAM 97 | end 98 | ls.data, ls.n, ls.p = data, #data, 1 99 | return pop(ls) 100 | end 101 | 102 | local function nextchar(ls) 103 | local c = ls.n > 0 and pop(ls) or fillbuf(ls) 104 | ls.current = c 105 | return c 106 | end 107 | 108 | local function curr_is_newline(ls) 109 | local c = ls.current 110 | return (c == '\n' or c == '\r') 111 | end 112 | 113 | local function resetbuf(ls) 114 | ls.save_buf = '' 115 | end 116 | 117 | local function resetbuf_tospace(ls) 118 | ls.space_buf = ls.space_buf .. ls.save_buf 119 | ls.save_buf = '' 120 | end 121 | 122 | local function spaceadd(ls, str) 123 | ls.space_buf = ls.space_buf .. str 124 | end 125 | 126 | local function save(ls, c) 127 | ls.save_buf = ls.save_buf .. c 128 | end 129 | 130 | local function savespace_and_next(ls) 131 | ls.space_buf = ls.space_buf .. ls.current 132 | nextchar(ls) 133 | end 134 | 135 | local function save_and_next(ls) 136 | ls.save_buf = ls.save_buf .. ls.current 137 | nextchar(ls) 138 | end 139 | 140 | local function get_string(ls, init_skip, end_skip) 141 | return strsub(ls.save_buf, init_skip + 1, - (end_skip + 1)) 142 | end 143 | 144 | local function get_space_string(ls) 145 | local s = ls.space_buf 146 | ls.space_buf = '' 147 | return s 148 | end 149 | 150 | local function inclinenumber(ls) 151 | local old = ls.current 152 | savespace_and_next(ls) -- skip `\n' or `\r' 153 | if curr_is_newline(ls) and ls.current ~= old then 154 | savespace_and_next(ls) -- skip `\n\r' or `\r\n' 155 | end 156 | ls.linenumber = ls.linenumber + 1 157 | end 158 | 159 | local function skip_sep(ls) 160 | local count = 0 161 | local s = ls.current 162 | assert(s == '[' or s == ']') 163 | save_and_next(ls) 164 | while ls.current == '=' do 165 | save_and_next(ls) 166 | count = count + 1 167 | end 168 | return ls.current == s and count or (-count - 1) 169 | end 170 | 171 | local function build_64int(str) 172 | local u = str[#str - 2] 173 | local x = (u == 117 and uint64(0) or int64(0)) 174 | local i = 1 175 | while str[i] >= ASCII_0 and str[i] <= ASCII_9 do 176 | x = 10 * x + (str[i] - ASCII_0) 177 | i = i + 1 178 | end 179 | return x 180 | end 181 | 182 | -- Only lower case letters are accepted. 183 | local function byte_to_hexdigit(b) 184 | if b >= ASCII_0 and b <= ASCII_9 then 185 | return b - ASCII_0 186 | elseif b >= ASCII_a and b <= ASCII_f then 187 | return 10 + (b - ASCII_a) 188 | else 189 | return -1 190 | end 191 | end 192 | 193 | local function build_64hex(str) 194 | local u = str[#str - 2] 195 | local x = (u == 117 and uint64(0) or int64(0)) 196 | local i = 3 197 | while str[i] do 198 | local n = byte_to_hexdigit(str[i]) 199 | if n < 0 then break end 200 | x = 16 * x + n 201 | i = i + 1 202 | end 203 | return x 204 | end 205 | 206 | local function strnumdump(str) 207 | local t = {} 208 | for i = 1, #str do 209 | local c = strsub(str, i, i) 210 | if char_isident(c) then 211 | t[i] = strbyte(c) 212 | else 213 | return nil 214 | end 215 | end 216 | return t 217 | end 218 | 219 | local function lex_number(ls) 220 | local lower = string.lower 221 | local xp = 'e' 222 | local c = ls.current 223 | if c == '0' then 224 | save_and_next(ls) 225 | local xc = ls.current 226 | if xc == 'x' or xc == 'X' then xp = 'p' end 227 | end 228 | while char_isident(ls.current) or ls.current == '.' or 229 | ((ls.current == '-' or ls.current == '+') and lower(c) == xp) do 230 | c = lower(ls.current) 231 | save(ls, c) 232 | nextchar(ls) 233 | end 234 | local str = ls.save_buf 235 | local x 236 | if strsub(str, -1, -1) == 'i' then 237 | local img = tonumber(strsub(str, 1, -2)) 238 | if img then x = complex(0, img) end 239 | elseif strsub(str, -2, -1) == 'll' then 240 | local t = strnumdump(str) 241 | if t then 242 | x = xp == 'e' and build_64int(t) or build_64hex(t) 243 | end 244 | else 245 | x = tonumber(str) 246 | end 247 | if x then 248 | return x 249 | else 250 | lex_error(ls, 'TK_number', "malformed number") 251 | end 252 | end 253 | 254 | local function read_long_string(ls, sep, ret_value) 255 | save_and_next(ls) -- skip 2nd `[' 256 | if curr_is_newline(ls) then -- string starts with a newline? 257 | inclinenumber(ls) -- skip it 258 | end 259 | while true do 260 | local c = ls.current 261 | if c == END_OF_STREAM then 262 | lex_error(ls, 'TK_eof', ret_value and "unfinished long string" or "unfinished long comment") 263 | elseif c == ']' then 264 | if skip_sep(ls) == sep then 265 | save_and_next(ls) -- skip 2nd `[' 266 | break 267 | end 268 | elseif c == '\n' or c == '\r' then 269 | save(ls, '\n') 270 | inclinenumber(ls) 271 | if not ret_value then 272 | resetbuf(ls) -- avoid wasting space 273 | end 274 | else 275 | if ret_value then save_and_next(ls) 276 | else nextchar(ls) end 277 | end 278 | end 279 | if ret_value then 280 | return get_string(ls, 2 + sep, 2 + sep) 281 | end 282 | end 283 | 284 | local Escapes = { 285 | a = '\a', b = '\b', f = '\f', n = '\n', r = '\r', t = '\t', 286 | v = '\v', 287 | } 288 | 289 | local function hex_char(c) 290 | if string.match(c, '^%x') then 291 | local b = band(strbyte(c), 15) 292 | if not char_isdigit(c) then b = b + 9 end 293 | return b 294 | end 295 | end 296 | 297 | local function read_escape_char(ls) 298 | local c = nextchar(ls) -- Skip the '\\'. 299 | local esc = Escapes[c] 300 | if esc then 301 | save(ls, esc) 302 | nextchar(ls) 303 | elseif c == 'x' then -- Hexadecimal escape '\xXX'. 304 | local ch1 = hex_char(nextchar(ls)) 305 | local hc 306 | if ch1 then 307 | local ch2 = hex_char(nextchar(ls)) 308 | if ch2 then 309 | hc = strchar(ch1 * 16 + ch2) 310 | end 311 | end 312 | if not hc then 313 | lex_error(ls, 'TK_string', "invalid escape sequence") 314 | end 315 | save(ls, hc) 316 | nextchar(ls) 317 | elseif c == 'z' then -- Skip whitespace. 318 | nextchar(ls) 319 | while char_isspace(ls.current) do 320 | if curr_is_newline(ls) then inclinenumber(ls) else nextchar(ls) end 321 | end 322 | elseif c == '\n' or c == '\r' then 323 | save(ls, '\n') 324 | inclinenumber(ls) 325 | elseif c == '\\' or c == '\"' or c == '\'' then 326 | save(ls, c) 327 | nextchar(ls) 328 | elseif c == END_OF_STREAM then 329 | else 330 | if not char_isdigit(c) then 331 | lex_error(ls, 'TK_string', "invalid escape sequence") 332 | end 333 | local bc = band(strbyte(c), 15) -- Decimal escape '\ddd'. 334 | if char_isdigit(nextchar(ls)) then 335 | bc = bc * 10 + band(strbyte(ls.current), 15) 336 | if char_isdigit(nextchar(ls)) then 337 | bc = bc * 10 + band(strbyte(ls.current), 15) 338 | if bc > 255 then 339 | lex_error(ls, 'TK_string', "invalid escape sequence") 340 | end 341 | nextchar(ls) 342 | end 343 | end 344 | save(ls, strchar(bc)) 345 | end 346 | end 347 | 348 | local function read_string(ls, delim) 349 | save_and_next(ls) 350 | while ls.current ~= delim do 351 | local c = ls.current 352 | if c == END_OF_STREAM then 353 | lex_error(ls, 'TK_eof', "unfinished string") 354 | elseif c == '\n' or c == '\r' then 355 | lex_error(ls, 'TK_string', "unfinished string") 356 | elseif c == '\\' then 357 | read_escape_char(ls) 358 | else 359 | save_and_next(ls) 360 | end 361 | end 362 | save_and_next(ls) -- skip delimiter 363 | return get_string(ls, 1, 1) 364 | end 365 | 366 | local function skip_line(ls) 367 | while not curr_is_newline(ls) and ls.current ~= END_OF_STREAM do 368 | savespace_and_next(ls) 369 | end 370 | end 371 | 372 | local function llex(ls) 373 | resetbuf(ls) 374 | while true do 375 | local current = ls.current 376 | if char_isident(current) then 377 | if char_isdigit(current) then -- Numeric literal. 378 | return 'TK_number', lex_number(ls) 379 | end 380 | repeat 381 | save_and_next(ls) 382 | until not char_isident(ls.current) 383 | local s = get_string(ls, 0, 0) 384 | local reserved = ReservedKeyword[s] 385 | if reserved then 386 | return 'TK_' .. s 387 | else 388 | return 'TK_name', s 389 | end 390 | end 391 | if current == '\n' or current == '\r' then 392 | inclinenumber(ls) 393 | elseif current == ' ' or current == '\t' or current == '\b' or current == '\f' then 394 | savespace_and_next(ls) 395 | -- nextchar(ls) 396 | elseif current == '-' then 397 | nextchar(ls) 398 | if ls.current ~= '-' then return '-' end 399 | -- else is a comment 400 | nextchar(ls) 401 | spaceadd(ls, '--') 402 | if ls.current == '[' then 403 | local sep = skip_sep(ls) 404 | resetbuf_tospace(ls) -- `skip_sep' may dirty the buffer 405 | if sep >= 0 then 406 | read_long_string(ls, sep, false) -- long comment 407 | resetbuf_tospace(ls) 408 | else 409 | skip_line(ls) 410 | end 411 | else 412 | skip_line(ls) 413 | end 414 | elseif current == '[' then 415 | local sep = skip_sep(ls) 416 | if sep >= 0 then 417 | local str = read_long_string(ls, sep, true) 418 | return 'TK_string', str 419 | elseif sep == -1 then 420 | return '[' 421 | else 422 | lex_error(ls, 'TK_string', "delimiter error") 423 | end 424 | elseif current == '=' then 425 | nextchar(ls) 426 | if ls.current ~= '=' then return '=' else nextchar(ls); return 'TK_eq' end 427 | elseif current == '<' then 428 | nextchar(ls) 429 | if ls.current ~= '=' then return '<' else nextchar(ls); return 'TK_le' end 430 | elseif current == '>' then 431 | nextchar(ls) 432 | if ls.current ~= '=' then return '>' else nextchar(ls); return 'TK_ge' end 433 | elseif current == '~' then 434 | nextchar(ls) 435 | if ls.current ~= '=' then return '~' else nextchar(ls); return 'TK_ne' end 436 | elseif current == '*' then 437 | nextchar(ls) 438 | if ls.current ~= '*' then return '*' else nextchar(ls); return '**' end 439 | elseif current == '^' then 440 | nextchar(ls) 441 | if ls.current ~= '^' then return '^' else nextchar(ls); return '^^' end 442 | elseif current == ':' then 443 | nextchar(ls) 444 | if ls.current ~= ':' then return ':' else nextchar(ls); return 'TK_label' end 445 | elseif current == '"' or current == "'" then 446 | local str = read_string(ls, current) 447 | return 'TK_string', str 448 | elseif current == '.' then 449 | save_and_next(ls) 450 | if ls.current == '.' then 451 | nextchar(ls) 452 | if ls.current == '.' then 453 | nextchar(ls) 454 | return 'TK_dots' -- ... 455 | end 456 | return 'TK_concat' -- .. 457 | elseif not char_isdigit(ls.current) then 458 | return '.' 459 | else 460 | return 'TK_number', lex_number(ls) 461 | end 462 | elseif current == END_OF_STREAM then 463 | return 'TK_eof' 464 | else 465 | nextchar(ls) 466 | return current -- Single-char tokens (+ - / ...). 467 | end 468 | end 469 | end 470 | 471 | local Lexer = { 472 | token2str = token2str, 473 | error = lex_error, 474 | } 475 | 476 | function Lexer.next(ls) 477 | ls.lastline = ls.linenumber 478 | if ls.tklookahead == 'TK_eof' then -- No lookahead token? 479 | ls.token, ls.tokenval = llex(ls) -- Get nextchar token. 480 | ls.space = get_space_string(ls) 481 | else 482 | ls.token, ls.tokenval = ls.tklookahead, ls.tklookaheadval 483 | ls.space = ls.spaceahead 484 | ls.tklookahead = 'TK_eof' 485 | end 486 | end 487 | 488 | function Lexer.lookahead(ls) 489 | assert(ls.tklookahead == 'TK_eof') 490 | ls.tklookahead, ls.tklookaheadval = llex(ls) 491 | ls.spaceahead = get_space_string(ls) 492 | return ls.tklookahead 493 | end 494 | 495 | local LexerClass = { __index = Lexer } 496 | 497 | local function lex_setup(read_func, chunkname) 498 | local header = false 499 | local ls = { 500 | n = 0, 501 | tklookahead = 'TK_eof', -- No look-ahead token. 502 | linenumber = 1, 503 | lastline = 1, 504 | read_func = read_func, 505 | chunkname = chunkname, 506 | space_buf = '' 507 | } 508 | nextchar(ls) 509 | if ls.current == '\xef' and ls.n >= 2 and 510 | byte(ls, 0) == '\xbb' and byte(ls, 1) == '\xbf' then -- Skip UTF-8 BOM (if buffered). 511 | ls.n = ls.n - 2 512 | ls.p = ls.p + 2 513 | nextchar(ls) 514 | header = true 515 | end 516 | if ls.current == '#' then 517 | repeat 518 | nextchar(ls) 519 | if ls.current == END_OF_STREAM then return ls end 520 | until curr_is_newline(ls) 521 | inclinenumber(ls) 522 | header = true 523 | end 524 | return setmetatable(ls, LexerClass) 525 | end 526 | 527 | return lex_setup 528 | -------------------------------------------------------------------------------- /lua-ast.lua: -------------------------------------------------------------------------------- 1 | local function build(kind, node) 2 | node.kind = kind 3 | return node 4 | end 5 | 6 | local function ident(name, line) 7 | return build("Identifier", { name = name, line = line }) 8 | end 9 | 10 | local function does_multi_return(expr) 11 | local k = expr.kind 12 | return k == "CallExpression" or k == "SendExpression" or k == "Vararg" 13 | end 14 | 15 | local AST = { } 16 | 17 | local function func_decl(id, body, params, vararg, locald, firstline, lastline) 18 | return build("FunctionDeclaration", { 19 | id = id, 20 | body = body, 21 | params = params, 22 | vararg = vararg, 23 | locald = locald, 24 | firstline = firstline, 25 | lastline = lastline, 26 | line = firstline, 27 | }) 28 | end 29 | 30 | local function func_expr(body, params, vararg, firstline, lastline) 31 | return build("FunctionExpression", { body = body, params = params, vararg = vararg, firstline = firstline, lastline = lastline }) 32 | end 33 | 34 | function AST.expr_function(ast, args, body, proto) 35 | return func_expr(body, args, proto.varargs, proto.firstline, proto.lastline) 36 | end 37 | 38 | function AST.local_function_decl(ast, name, args, body, proto) 39 | local id = ast:var_declare(name) 40 | return func_decl(id, body, args, proto.varargs, true, proto.firstline, proto.lastline) 41 | end 42 | 43 | function AST.function_decl(ast, path, args, body, proto) 44 | return func_decl(path, body, args, proto.varargs, false, proto.firstline, proto.lastline) 45 | end 46 | 47 | function AST.chunk(ast, body, chunkname, firstline, lastline) 48 | return build("Chunk", { body = body, chunkname = chunkname, firstline = firstline, lastline = lastline }) 49 | end 50 | 51 | function AST.local_decl(ast, vlist, exps, line) 52 | local ids = {} 53 | for k = 1, #vlist do 54 | ids[k] = ast:var_declare(vlist[k]) 55 | end 56 | return build("LocalDeclaration", { names = ids, expressions = exps, line = line }) 57 | end 58 | 59 | function AST.assignment_expr(ast, vars, exps, line) 60 | return build("AssignmentExpression", { left = vars, right = exps, line = line }) 61 | end 62 | 63 | function AST.assignment_algebra_expr(ast, vars, exps, line) 64 | return build("AssignmentAlgebraExpression", { left = vars, right = exps, line = line }) 65 | end 66 | 67 | function AST.expr_index(ast, v, index, line) 68 | return build("MemberExpression", { object = v, property = index, computed = true, line = line }) 69 | end 70 | 71 | function AST.expr_algebra_index(ast, v, line) 72 | return build("IndexAlgebraExpression", { object = v, line = line }) 73 | end 74 | 75 | function AST.expr_property(ast, v, prop, line) 76 | local index = ident(prop, line) 77 | return build("MemberExpression", { object = v, property = index, computed = false, line = line }) 78 | end 79 | 80 | function AST.literal(ast, val) 81 | return build("Literal", { value = val }) 82 | end 83 | 84 | function AST.expr_vararg(ast) 85 | return build("Vararg", { }) 86 | end 87 | 88 | function AST.expr_brackets(ast, expr) 89 | expr.bracketed = true 90 | return expr 91 | end 92 | 93 | function AST.set_expr_last(ast, expr) 94 | if expr.bracketed and does_multi_return(expr) then 95 | expr.bracketed = nil 96 | return build("ExpressionValue", { value = expr }) 97 | else 98 | return expr 99 | end 100 | end 101 | 102 | function AST.expr_table(ast, keyvals, line) 103 | return build("Table", { keyvals = keyvals, line = line }) 104 | end 105 | 106 | function AST.expr_unop(ast, op, v, line) 107 | return build("UnaryExpression", { operator = op, argument = v, line = line }) 108 | end 109 | 110 | function AST.expr_algebra_unop(ast, op, v, line) 111 | return build("UnaryAlgebraExpression", { operator = op, argument = v, line = line }) 112 | end 113 | 114 | local function concat_append(ts, node) 115 | local n = #ts 116 | if node.kind == "ConcatenateExpression" then 117 | for k = 1, #node.terms do ts[n + k] = node.terms[k] end 118 | else 119 | ts[n + 1] = node 120 | end 121 | end 122 | 123 | function AST.expr_binop(ast, op, expa, expb, line) 124 | local binop_body = (op ~= '..' and { operator = op, left = expa, right = expb, line = line }) 125 | if binop_body then 126 | if op == 'and' or op == 'or' then 127 | return build("LogicalExpression", binop_body) 128 | else 129 | return build("BinaryExpression", binop_body) 130 | end 131 | else 132 | local terms = { } 133 | concat_append(terms, expa) 134 | concat_append(terms, expb) 135 | return build("ConcatenateExpression", { terms = terms, line = expa.line }) 136 | end 137 | end 138 | 139 | function AST.expr_algebra_binop(ast, op, expa, expb, line) 140 | local binop_body = (op ~= '..' and { operator = op, left = expa, right = expb, line = line }) 141 | if binop_body then 142 | if op == 'and' or op == 'or' then 143 | error('not yet implemented') 144 | else 145 | return build("BinaryAlgebraExpression", binop_body) 146 | end 147 | else 148 | error('not yet implemented') 149 | end 150 | end 151 | 152 | function AST.identifier(ast, name) 153 | return ident(name) 154 | end 155 | 156 | function AST.expr_method_call(ast, v, key, args, line) 157 | local m = ident(key) 158 | return build("SendExpression", { receiver = v, method = m, arguments = args, line = line }) 159 | end 160 | 161 | function AST.expr_function_call(ast, v, args, line) 162 | return build("CallExpression", { callee = v, arguments = args, line = line }) 163 | end 164 | 165 | function AST.return_stmt(ast, exps, line) 166 | return build("ReturnStatement", { arguments = exps, line = line }) 167 | end 168 | 169 | function AST.break_stmt(ast, line) 170 | return build("BreakStatement", { line = line }) 171 | end 172 | 173 | function AST.label_stmt(ast, name, line) 174 | return build("LabelStatement", { label = name, line = line }) 175 | end 176 | 177 | function AST.new_statement_expr(ast, expr, line) 178 | return build("ExpressionStatement", { expression = expr, line = line }) 179 | end 180 | 181 | function AST.if_stmt(ast, tests, cons, else_branch, line) 182 | return build("IfStatement", { tests = tests, cons = cons, alternate = else_branch, line = line }) 183 | end 184 | 185 | function AST.do_stmt(ast, body, line, lastline) 186 | return build("DoStatement", { body = body, line = line, lastline = lastline}) 187 | end 188 | 189 | function AST.while_stmt(ast, test, body, line, lastline) 190 | return build("WhileStatement", { test = test, body = body, line = line, lastline = lastline }) 191 | end 192 | 193 | function AST.repeat_stmt(ast, test, body, line, lastline) 194 | return build("RepeatStatement", { test = test, body = body, line = line, lastline = lastline }) 195 | end 196 | 197 | function AST.for_stmt(ast, var, init, last, step, body, line, lastline) 198 | local for_init = build("ForInit", { id = var, value = init, line = line }) 199 | return build("ForStatement", { init = for_init, last = last, step = step, body = body, line = line, lastline = lastline }) 200 | end 201 | 202 | function AST.for_iter_stmt(ast, vars, exps, body, line, lastline) 203 | local names = build("ForNames", { names = vars, line = line }) 204 | return build("ForInStatement", { namelist = names, explist = exps, body = body, line = line, lastline = lastline }) 205 | end 206 | 207 | function AST.goto_stmt(ast, name, line) 208 | return build("GotoStatement", { label = name, line = line }) 209 | end 210 | 211 | local function new_scope(parent_scope) 212 | return { 213 | vars = { }, 214 | parent = parent_scope, 215 | } 216 | end 217 | 218 | function AST.var_declare(ast, name) 219 | local id = ident(name) 220 | ast.current.vars[name] = true 221 | return id 222 | end 223 | 224 | function AST.fscope_begin(ast) 225 | ast.current = new_scope(ast.current) 226 | end 227 | 228 | function AST.fscope_end(ast) 229 | ast.current = ast.current.parent 230 | end 231 | 232 | local ASTClass = { __index = AST } 233 | 234 | local function new_ast() 235 | return setmetatable({ }, ASTClass) 236 | end 237 | 238 | return { New = new_ast } 239 | -------------------------------------------------------------------------------- /luacode-generator.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- luacode-generator.lua 3 | -- 4 | -- This file is part of the LuaJIT Language Toolkit. 5 | -- 6 | -- Module to generate the Lua code that corresponds to a given Lua AST Tree. 7 | -- Can be used as an alternative to the bytecode generator. 8 | 9 | local operator = require("sci-lang.operator") 10 | 11 | local strbyte, strsub = string.byte, string.sub 12 | 13 | local LuaReservedKeyword = {['and'] = 1, ['break'] = 2, ['do'] = 3, ['else'] = 4, ['elseif'] = 5, ['end'] = 6, ['false'] = 7, ['for'] = 8, ['function'] = 9, ['goto'] = 10, ['if'] = 11, ['in'] = 12, ['local'] = 13, ['nil'] = 14, ['not'] = 15, ['or'] = 16, ['repeat'] = 17, ['return'] = 18, ['then'] = 19, ['true'] = 20, ['until'] = 21, ['while'] = 22 } 14 | 15 | local ASCII_0, ASCII_9 = 48, 57 16 | local ASCII_a, ASCII_z = 97, 122 17 | local ASCII_A, ASCII_Z = 65, 90 18 | 19 | local function char_isletter(c) 20 | local b = strbyte(c) 21 | if b >= ASCII_a and b <= ASCII_z then 22 | return true 23 | elseif b >= ASCII_A and b <= ASCII_Z then 24 | return true 25 | else 26 | return (c == '_') 27 | end 28 | end 29 | 30 | local function char_isdigit(c) 31 | local b = strbyte(c) 32 | return b >= ASCII_0 and b <= ASCII_9 33 | end 34 | 35 | local function replace_cc(c) 36 | local esc = { 37 | ['\a'] = [[\a]], ['\b'] = [[\b]], ['\f'] = [[\f]], ['\n'] = [[\n]], ['\r'] = [[\r]], ['\t'] = [[\t]], ['\v'] = [[\v]] 38 | } 39 | return esc[c] and esc[c] or ('\\' .. string.format("%d", string.byte(c))) 40 | end 41 | 42 | local function escape(s) 43 | s = string.gsub(s, "[\"\\]", "\\%1") 44 | return string.gsub(s, "%c", replace_cc) 45 | end 46 | 47 | local StatementRule = { } 48 | local ExpressionRule = { } 49 | 50 | local concat = table.concat 51 | local format = string.format 52 | 53 | local function is_string(node) 54 | return node.kind == "Literal" and type(node.value) == "string" 55 | end 56 | 57 | local function is_const(node, val) 58 | return node.kind == "Literal" and node.value == val 59 | end 60 | 61 | local function is_literal(node) 62 | local k = node.kind 63 | return (k == "Literal" or k == "Table") 64 | end 65 | 66 | local function string_is_ident(str) 67 | local c = strsub(str, 1, 1) 68 | if c == '' or not char_isletter(c) then 69 | return false 70 | end 71 | for k = 2, #str do 72 | c = strsub(str, k, k) 73 | if not char_isletter(c) and not char_isdigit(c) then 74 | return false 75 | end 76 | end 77 | return not LuaReservedKeyword[str] 78 | end 79 | 80 | local function comma_sep_list(ls, f) 81 | local strls 82 | if f then 83 | strls = { } 84 | for k = 1, #ls do strls[k] = f(ls[k]) end 85 | else 86 | strls = ls 87 | end 88 | return concat(strls, ", ") 89 | end 90 | 91 | local function as_parameter(node) 92 | return node.kind == "Vararg" and "..." or node.name 93 | end 94 | 95 | function ExpressionRule:Identifier(node) 96 | return node.name, operator.ident_priority 97 | end 98 | 99 | function ExpressionRule:Literal(node) 100 | local val = node.value 101 | local str = type(val) == "string" and format("\"%s\"", escape(val)) or tostring(val) 102 | return str, operator.ident_priority 103 | end 104 | 105 | function ExpressionRule:MemberExpression(node) 106 | local object, prio = self:expr_emit(node.object) 107 | if prio < operator.ident_priority or is_literal(node.object) then 108 | object = "(" .. object .. ")" 109 | end 110 | local exp 111 | if node.computed then 112 | local prop = self:expr_emit(node.property) 113 | exp = format("%s[%s]", object, prop) 114 | else 115 | exp = format("%s.%s", object, node.property.name) 116 | end 117 | return exp, operator.ident_priority 118 | end 119 | 120 | function ExpressionRule:Vararg() 121 | return "...", operator.ident_priority 122 | end 123 | 124 | function ExpressionRule:ExpressionValue(node) 125 | return "(" .. self:expr_emit(node.value) .. ")" 126 | end 127 | 128 | function ExpressionRule:BinaryExpression(node) 129 | local oper = node.operator 130 | local lprio = operator.left_priority(oper) 131 | local rprio = operator.right_priority(oper) 132 | local a, alprio, arprio = self:expr_emit(node.left) 133 | local b, blprio, brprio = self:expr_emit(node.right) 134 | if not arprio then arprio = alprio end 135 | if not brprio then brprio = blprio end 136 | local ap = arprio < lprio and format("(%s)", a) or a 137 | local bp = blprio <= rprio and format("(%s)", b) or b 138 | return format("%s %s %s", ap, oper, bp), lprio, rprio 139 | end 140 | 141 | function ExpressionRule:UnaryExpression(node) 142 | local arg, arg_prio = self:expr_emit(node.argument) 143 | local op_prio = operator.unary_priority(node.operator) 144 | if arg_prio < op_prio then arg = format("(%s)", arg) end 145 | return format("%s %s", node.operator, arg), operator.unary_priority(node.operator) 146 | end 147 | 148 | ExpressionRule.LogicalExpression = ExpressionRule.BinaryExpression 149 | 150 | function ExpressionRule:ConcatenateExpression(node) 151 | local ls = { } 152 | local cat_prio = operator.left_priority("..") 153 | for k = 1, #node.terms do 154 | local kprio 155 | ls[k], kprio = self:expr_emit(node.terms[k]) 156 | if kprio < cat_prio then ls[k] = format("(%s)", ls[k]) end 157 | end 158 | return concat(ls, " .. "), cat_prio 159 | end 160 | 161 | function ExpressionRule:Table(node) 162 | local hash = { } 163 | for i = 1, #node.keyvals do 164 | local kv = node.keyvals[i] 165 | local val = self:expr_emit(kv[1]) 166 | local key = kv[2] 167 | if key then 168 | if is_string(key) and string_is_ident(key.value) then 169 | hash[i] = format("%s = %s", key.value, val) 170 | else 171 | hash[i] = format("[%s] = %s", self:expr_emit(key), val) 172 | end 173 | else 174 | hash[i] = format("%s", val) 175 | end 176 | end 177 | local content = "" 178 | if #hash > 0 then 179 | content = comma_sep_list(hash) 180 | end 181 | return "{" .. content .. "}", operator.ident_priority 182 | end 183 | 184 | function ExpressionRule:CallExpression(node) 185 | local callee, prio = self:expr_emit(node.callee) 186 | if prio < operator.ident_priority then 187 | callee = "(" .. callee .. ")" 188 | end 189 | local exp = format("%s(%s)", callee, self:expr_list(node.arguments)) 190 | return exp, operator.ident_priority 191 | end 192 | 193 | function ExpressionRule:SendExpression(node) 194 | local rec, prio = self:expr_emit(node.receiver) 195 | if prio < operator.ident_priority or is_literal(node.receiver) then 196 | rec = "(" .. rec .. ")" 197 | end 198 | local method = node.method.name 199 | local exp = format("%s:%s(%s)", rec, method, self:expr_list(node.arguments)) 200 | return exp, operator.ident_priority 201 | end 202 | 203 | function StatementRule:FunctionDeclaration(node) 204 | self:proto_enter(0) 205 | local name = self:expr_emit(node.id) 206 | local header = format("function %s(%s)", name, comma_sep_list(node.params, as_parameter)) 207 | if node.locald then 208 | header = "local " .. header 209 | end 210 | self:add_section(header, node.body) 211 | local child_proto = self:proto_leave() 212 | self.proto:merge(child_proto) 213 | end 214 | 215 | function ExpressionRule:FunctionExpression(node) 216 | self:proto_enter() 217 | local header = format("function(%s)", comma_sep_list(node.params, as_parameter)) 218 | self:add_section(header, node.body) 219 | local child_proto = self:proto_leave() 220 | return child_proto:inline(), 0 221 | end 222 | 223 | function StatementRule:CallExpression(node) 224 | local line = self:expr_emit(node) 225 | self:add_line(line) 226 | end 227 | 228 | function StatementRule:ForStatement(node) 229 | local init = node.init 230 | local istart = self:expr_emit(init.value) 231 | local iend = self:expr_emit(node.last) 232 | local header 233 | if node.step and not is_const(node.step, 1) then 234 | local step = self:expr_emit(node.step) 235 | header = format("for %s = %s, %s, %s do", init.id.name, istart, iend, step) 236 | else 237 | header = format("for %s = %s, %s do", init.id.name, istart, iend) 238 | end 239 | self:add_section(header, node.body) 240 | end 241 | 242 | function StatementRule:ForInStatement(node) 243 | local vars = comma_sep_list(node.namelist.names, as_parameter) 244 | local explist = self:expr_list(node.explist) 245 | local header = format("for %s in %s do", vars, explist) 246 | self:add_section(header, node.body) 247 | end 248 | 249 | function StatementRule:DoStatement(node) 250 | self:add_section("do", node.body) 251 | end 252 | 253 | function StatementRule:WhileStatement(node) 254 | local test = self:expr_emit(node.test) 255 | local header = format("while %s do", test) 256 | self:add_section(header, node.body) 257 | end 258 | 259 | function StatementRule:RepeatStatement(node) 260 | self:add_section("repeat", node.body, true) 261 | local test = self:expr_emit(node.test) 262 | local until_line = format("until %s", test) 263 | self:add_line(until_line) 264 | end 265 | 266 | function StatementRule:BreakStatement() 267 | self:add_line("break") 268 | end 269 | 270 | function StatementRule:IfStatement(node) 271 | local ncons = #node.tests 272 | for i = 1, ncons do 273 | local header_tag = i == 1 and "if" or "elseif" 274 | local test = self:expr_emit(node.tests[i]) 275 | local header = format("%s %s then", header_tag, test) 276 | self:add_section(header, node.cons[i], true) 277 | end 278 | if node.alternate then 279 | self:add_section("else", node.alternate, true) 280 | end 281 | self:add_line("end") 282 | end 283 | 284 | function StatementRule:LocalDeclaration(node) 285 | local line 286 | local names = comma_sep_list(node.names, as_parameter) 287 | if #node.expressions > 0 then 288 | line = format("local %s = %s", names, self:expr_list(node.expressions)) 289 | else 290 | line = format("local %s", names) 291 | end 292 | self:add_line(line) 293 | end 294 | 295 | function StatementRule:AssignmentExpression(node) 296 | local line = format("%s = %s", self:expr_list(node.left), self:expr_list(node.right)) 297 | self:add_line(line) 298 | end 299 | 300 | function StatementRule:Chunk(node) 301 | self:list_emit(node.body) 302 | end 303 | 304 | function StatementRule:ExpressionStatement(node) 305 | local line = self:expr_emit(node.expression) 306 | self:add_line(line) 307 | end 308 | 309 | function StatementRule:ReturnStatement(node) 310 | local line = format("return %s", self:expr_list(node.arguments)) 311 | self:add_line(line) 312 | end 313 | 314 | function StatementRule:LabelStatement(node) 315 | self:add_line("::" .. node.label .. "::") 316 | end 317 | 318 | function StatementRule:GotoStatement(node) 319 | self:add_line("goto " .. node.label) 320 | end 321 | 322 | local function proto_inline(proto) 323 | -- remove leading whitespaces from first line 324 | if #proto.code > 0 then 325 | proto.code[1] = string.gsub(proto.code[1], "^%s*", "") 326 | end 327 | return concat(proto.code, "\n") 328 | end 329 | 330 | local function proto_merge(proto, child) 331 | for k = 1, #child.code do 332 | local line = child.code[k] 333 | local indent_str = string.rep(" ", proto.indent) 334 | proto.code[#proto.code + 1] = indent_str .. line 335 | end 336 | end 337 | 338 | local function proto_new(parent, indent) 339 | local ind = 0 340 | if indent then 341 | ind = indent 342 | elseif parent then 343 | ind = parent.indent 344 | end 345 | local proto = { code = { }, indent = ind, parent = parent } 346 | proto.inline = proto_inline 347 | proto.merge = proto_merge 348 | return proto 349 | end 350 | 351 | local function generate(tree, name) 352 | 353 | local self = { line = 0 } 354 | self.proto = proto_new() 355 | self.chunkname = tree.chunkname 356 | 357 | function self:proto_enter(indent) 358 | self.proto = proto_new(self.proto, indent) 359 | end 360 | 361 | function self:proto_leave() 362 | local proto = self.proto 363 | self.proto = proto.parent 364 | return proto 365 | end 366 | 367 | local function to_expr(node) 368 | return self:expr_emit(node) 369 | end 370 | 371 | function self:compile_code() 372 | return concat(self.code, "\n") 373 | end 374 | 375 | function self:indent_more() 376 | local proto = self.proto 377 | proto.indent = proto.indent + 1 378 | end 379 | 380 | function self:indent_less() 381 | local proto = self.proto 382 | proto.indent = proto.indent - 1 383 | end 384 | 385 | function self:line(line) 386 | -- FIXME: ignored for the moment 387 | end 388 | 389 | function self:add_line(line) 390 | local proto = self.proto 391 | local indent = string.rep(" ", proto.indent) 392 | proto.code[#proto.code + 1] = indent .. line 393 | end 394 | 395 | function self:add_section(header, body, omit_end) 396 | self:add_line(header) 397 | self:indent_more() 398 | self:list_emit(body) 399 | self:indent_less() 400 | if not omit_end then 401 | self:add_line("end") 402 | end 403 | end 404 | 405 | function self:expr_emit(node) 406 | local rule = ExpressionRule[node.kind] 407 | if not rule then error("cannot find an expression rule for " .. node.kind) end 408 | return rule(self, node) 409 | end 410 | 411 | function self:expr_list(exps) 412 | return comma_sep_list(exps, to_expr) 413 | end 414 | 415 | function self:emit(node) 416 | local rule = StatementRule[node.kind] 417 | if not rule then error("cannot find a statement rule for " .. node.kind) end 418 | rule(self, node) 419 | if node.line then self:line(node.line) end 420 | end 421 | 422 | function self:list_emit(node_list) 423 | for i = 1, #node_list do 424 | self:emit(node_list[i]) 425 | end 426 | end 427 | 428 | self:emit(tree) 429 | 430 | return self:proto_leave():inline() 431 | end 432 | 433 | return generate 434 | -------------------------------------------------------------------------------- /operator.lua: -------------------------------------------------------------------------------- 1 | -- Priorities for each binary operator. 2 | -- (left priority) * 256 + (right priority) 3 | -- modulus is your friend 4 | local binop = { 5 | ['+'] = 6 * 256 + 6, ['-'] = 6 * 256 + 6, ['*'] = 7 * 256 + 7, ['/'] = 7 * 256 + 7, ['%'] = 7 * 256 + 7, 6 | ['^'] = 10* 256 + 9, ['..'] = 5 * 256 + 4, -- POW CONCAT (right associative) 7 | ['=='] = 3 * 256 + 3, ['~='] = 3 * 256 + 3, 8 | ['<'] = 3 * 256 + 3, ['>='] = 3 * 256 + 3, ['>'] = 3 * 256 + 3, ['<='] = 3 * 256 + 3, 9 | ['and']= 2 * 256 + 2, ['or'] = 1 * 256 + 1, 10 | ['**'] = 10* 256 + 10, 11 | ['^^'] = 12* 256 + 11, -- right associative 12 | } 13 | 14 | local unaop = { 15 | ['#'] = 8, 16 | ['-'] = 8, 17 | ['not'] = 8, 18 | ['`'] = 13, -- highest 19 | } 20 | 21 | local function unary_priority(op) 22 | return unaop[op] 23 | end 24 | 25 | -- Pseudo priority of a simple identifier. Should be higher than any 26 | -- others operator's priority. 27 | local ident_priority = 16 28 | 29 | local function is_binop(op) 30 | return binop[op] 31 | end 32 | 33 | local function left_priority(op) 34 | return bit.rshift(binop[op], 8) 35 | end 36 | 37 | local function right_priority(op) 38 | return bit.band(binop[op], 0xff) 39 | end 40 | 41 | return { 42 | is_binop = is_binop, 43 | left_priority = left_priority, 44 | right_priority = right_priority, 45 | unary_priority = unary_priority, 46 | ident_priority = ident_priority, 47 | } 48 | -------------------------------------------------------------------------------- /parser.lua: -------------------------------------------------------------------------------- 1 | local operator = require("sci-lang.operator") 2 | 3 | local LJ_52 = false 4 | 5 | local EndOfBlock = { TK_else = true, TK_elseif = true, TK_end = true, TK_until = true, TK_eof = true } 6 | 7 | local is_algebra_map = { 8 | BinaryAlgebraExpression = true, 9 | UnaryAlgebraExpression = true, 10 | IndexAlgebraExpression = true, 11 | } 12 | 13 | local function is_algebra(node) 14 | return is_algebra_map[node.kind] 15 | end 16 | 17 | local function err_syntax(ls, em) 18 | ls:error(ls.token, em) 19 | end 20 | 21 | local function err_token(ls, token) 22 | ls:error(ls.token, "'%s' expected", ls.token2str(token)) 23 | end 24 | 25 | local function checkcond(ls, cond, em) 26 | if not cond then err_syntax(ls, em) end 27 | end 28 | 29 | local function lex_opt(ls, tok) 30 | if ls.token == tok then 31 | ls:next() 32 | return true 33 | end 34 | return false 35 | end 36 | 37 | local function lex_check(ls, tok) 38 | if ls.token ~= tok then err_token(ls, tok) end 39 | ls:next() 40 | end 41 | 42 | local function lex_match(ls, what, who, line) 43 | if not lex_opt(ls, what) then 44 | if line == ls.linenumber then 45 | err_token(ls, what) 46 | else 47 | local token2str = ls.token2str 48 | ls:error(ls.token, "%s expected (to close %s at line %d)", token2str(what), token2str(who), line) 49 | end 50 | end 51 | end 52 | 53 | local function lex_str(ls) 54 | if ls.token ~= 'TK_name' and (LJ_52 or ls.token ~= 'TK_goto') then 55 | err_token(ls, 'TK_name') 56 | end 57 | local s = ls.tokenval 58 | ls:next() 59 | return s 60 | end 61 | 62 | local expr_primary, expr, expr_unop, expr_binop, expr_simple 63 | local expr_list, expr_table 64 | local parse_body, parse_block, parse_args 65 | 66 | local function var_lookup(ast, ls) 67 | local name = lex_str(ls) 68 | return ast:identifier(name) 69 | end 70 | 71 | local function expr_field(ast, ls, v) 72 | ls:next() -- Skip dot or colon. 73 | local key = lex_str(ls) 74 | return ast:expr_property(v, key) 75 | end 76 | 77 | local function expr_bracket(ast, ls, accept_empty) 78 | ls:next() -- Skip '['. 79 | local v 80 | if accept_empty and lex_opt(ls, ']') then 81 | v = nil 82 | else 83 | v = expr(ast, ls) 84 | lex_check(ls, ']') 85 | end 86 | return v 87 | end 88 | 89 | function expr_table(ast, ls) 90 | local line = ls.linenumber 91 | local kvs = {} 92 | lex_check(ls, '{') 93 | while ls.token ~= '}' do 94 | local key 95 | if ls.token == '[' then 96 | key = expr_bracket(ast, ls) 97 | lex_check(ls, '=') 98 | elseif (ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto')) and ls:lookahead() == '=' then 99 | local name = lex_str(ls) 100 | key = ast:literal(name) 101 | lex_check(ls, '=') 102 | end 103 | local val = expr(ast, ls) 104 | kvs[#kvs + 1] = { val, key } -- "key" can be nil. 105 | if not lex_opt(ls, ',') and not lex_opt(ls, ';') then break end 106 | end 107 | lex_match(ls, '}', '{', line) 108 | return ast:expr_table(kvs, line) 109 | end 110 | 111 | function expr_simple(ast, ls) 112 | local tk, val = ls.token, ls.tokenval 113 | local e 114 | if tk == 'TK_number' then 115 | e = ast:literal(val) 116 | elseif tk == 'TK_string' then 117 | e = ast:literal(val) 118 | elseif tk == 'TK_nil' then 119 | e = ast:literal(nil) 120 | elseif tk == 'TK_true' then 121 | e = ast:literal(true) 122 | elseif tk == 'TK_false' then 123 | e = ast:literal(false) 124 | elseif tk == 'TK_dots' then 125 | if not ls.fs.varargs then 126 | err_syntax(ls, "cannot use \"...\" outside a vararg function") 127 | end 128 | e = ast:expr_vararg() 129 | elseif tk == '{' then 130 | return expr_table(ast, ls) 131 | elseif tk == 'TK_function' then 132 | ls:next() 133 | local args, body, proto = parse_body(ast, ls, ls.linenumber, false) 134 | return ast:expr_function(args, body, proto) 135 | else 136 | return expr_primary(ast, ls) 137 | end 138 | ls:next() 139 | return e 140 | end 141 | 142 | function expr_list(ast, ls) 143 | local exps = { } 144 | exps[1] = expr(ast, ls) 145 | while lex_opt(ls, ',') do 146 | exps[#exps + 1] = expr(ast, ls) 147 | end 148 | local n = #exps 149 | if n > 0 then 150 | exps[n] = ast:set_expr_last(exps[n]) 151 | end 152 | return exps 153 | end 154 | 155 | function expr_unop(ast, ls) 156 | local tk = ls.token 157 | if tk == 'TK_not' or tk == '-' or tk == '#' then 158 | local line = ls.linenumber 159 | ls:next() 160 | local op = ls.token2str(tk) 161 | local v = expr_binop(ast, ls, operator.unary_priority(op)) 162 | if is_algebra(v) then 163 | assert(op == '-', 'not yet implemented') 164 | return ast:expr_algebra_unop(op, v, line) 165 | else 166 | return ast:expr_unop(op, v, line) 167 | end 168 | else 169 | local exp = expr_simple(ast, ls) 170 | if lex_opt(ls, '`') then 171 | local line = ls.linenumber 172 | exp = ast:expr_algebra_unop('`', exp, line) 173 | end 174 | return exp 175 | end 176 | end 177 | 178 | -- Parse binary expressions with priority higher than the limit. 179 | function expr_binop(ast, ls, limit) 180 | local v = expr_unop(ast, ls) 181 | local op = ls.token2str(ls.token) 182 | while operator.is_binop(op) and operator.left_priority(op) > limit do 183 | local line = ls.linenumber 184 | ls:next() 185 | local v2, nextop = expr_binop(ast, ls, operator.right_priority(op)) 186 | if is_algebra(v) or is_algebra(v2) then 187 | v = ast:expr_algebra_binop(op, v, v2, line) 188 | else 189 | v = ast:expr_binop(op, v, v2, line) 190 | end 191 | op = nextop 192 | end 193 | return v, op 194 | end 195 | 196 | function expr(ast, ls) 197 | return expr_binop(ast, ls, 0) -- Priority 0: parse whole expression. 198 | end 199 | 200 | -- Parse primary expression. 201 | function expr_primary(ast, ls) 202 | local v, vk 203 | -- Parse prefix expression. 204 | if ls.token == '(' then 205 | local line = ls.linenumber 206 | ls:next() 207 | vk, v = 'expr', ast:expr_brackets(expr(ast, ls)) 208 | lex_match(ls, ')', '(', line) 209 | elseif ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto') then 210 | vk, v = 'var', var_lookup(ast, ls) 211 | else 212 | err_syntax(ls, "unexpected symbol") 213 | end 214 | while true do -- Parse multiple expression suffixes. 215 | local line = ls.linenumber 216 | if ls.token == '.' then 217 | vk, v = 'indexed', expr_field(ast, ls, v) 218 | elseif ls.token == '[' then 219 | local key = expr_bracket(ast, ls, true) 220 | if key then 221 | vk, v = 'indexed', ast:expr_index(v, key) 222 | else 223 | vk, v = 'indexed', ast:expr_algebra_index(v) 224 | end 225 | elseif ls.token == ':' then 226 | ls:next() 227 | local key = lex_str(ls) 228 | local args = parse_args(ast, ls) 229 | vk, v = 'call', ast:expr_method_call(v, key, args, line) 230 | elseif ls.token == '(' or ls.token == 'TK_string' or ls.token == '{' then 231 | local args = parse_args(ast, ls) 232 | vk, v = 'call', ast:expr_function_call(v, args, line) 233 | else 234 | break 235 | end 236 | end 237 | return v, vk 238 | end 239 | 240 | -- Parse statements ---------------------------------------------------- 241 | 242 | 243 | -- Parse 'return' statement. 244 | local function parse_return(ast, ls, line) 245 | ls:next() -- Skip 'return'. 246 | ls.fs.has_return = true 247 | local exps 248 | if EndOfBlock[ls.token] or ls.token == ';' then -- Base return. 249 | exps = { } 250 | else -- Return with one or more values. 251 | exps = expr_list(ast, ls) 252 | end 253 | return ast:return_stmt(exps, line) 254 | end 255 | 256 | -- Parse numeric 'for'. 257 | local function parse_for_num(ast, ls, varname, line) 258 | lex_check(ls, '=') 259 | local init = expr(ast, ls) 260 | lex_check(ls, ',') 261 | local last = expr(ast, ls) 262 | local step 263 | if lex_opt(ls, ',') then 264 | step = expr(ast, ls) 265 | else 266 | step = ast:literal(1) 267 | end 268 | lex_check(ls, 'TK_do') 269 | local body = parse_block(ast, ls, line) 270 | local var = ast:identifier(varname) 271 | return ast:for_stmt(var, init, last, step, body, line, ls.linenumber) 272 | end 273 | 274 | -- Parse 'for' iterator. 275 | local function parse_for_iter(ast, ls, indexname) 276 | local vars = { ast:identifier(indexname) } 277 | while lex_opt(ls, ',') do 278 | vars[#vars + 1] = ast:identifier(lex_str(ls)) 279 | end 280 | lex_check(ls, 'TK_in') 281 | local line = ls.linenumber 282 | local exps = expr_list(ast, ls) 283 | lex_check(ls, 'TK_do') 284 | local body = parse_block(ast, ls, line) 285 | return ast:for_iter_stmt(vars, exps, body, line, ls.linenumber) 286 | end 287 | 288 | -- Parse 'for' statement. 289 | local function parse_for(ast, ls, line) 290 | ls:next() -- Skip 'for'. 291 | local varname = lex_str(ls) -- Get first variable name. 292 | local stmt 293 | if ls.token == '=' then 294 | stmt = parse_for_num(ast, ls, varname, line) 295 | elseif ls.token == ',' or ls.token == 'TK_in' then 296 | stmt = parse_for_iter(ast, ls, varname) 297 | else 298 | err_syntax(ls, "'=' or 'in' expected") 299 | end 300 | lex_match(ls, 'TK_end', 'TK_for', line) 301 | return stmt 302 | end 303 | 304 | local function parse_repeat(ast, ls, line) 305 | ast:fscope_begin() 306 | ls:next() -- Skip 'repeat'. 307 | local body = parse_block(ast, ls) 308 | local lastline = ls.linenumber 309 | lex_match(ls, 'TK_until', 'TK_repeat', line) 310 | local cond = expr(ast, ls) -- Parse condition. 311 | ast:fscope_end() 312 | return ast:repeat_stmt(cond, body, line, lastline) 313 | end 314 | 315 | -- Parse function argument list. 316 | function parse_args(ast, ls) 317 | local line = ls.linenumber 318 | local args 319 | if ls.token == '(' then 320 | if not LJ_52 and line ~= ls.lastline then 321 | err_syntax(ls, "ambiguous syntax (function call x new statement)") 322 | end 323 | ls:next() 324 | if ls.token ~= ')' then -- Not f(). 325 | args = expr_list(ast, ls) 326 | else 327 | args = { } 328 | end 329 | lex_match(ls, ')', '(', line) 330 | elseif ls.token == '{' then 331 | local a = expr_table(ast, ls) 332 | args = { a } 333 | elseif ls.token == 'TK_string' then 334 | local a = ls.tokenval 335 | ls:next() 336 | args = { ast:literal(a) } 337 | else 338 | err_syntax(ls, "function arguments expected") 339 | end 340 | return args 341 | end 342 | 343 | local function parse_assignment(ast, ls, vlist, var, vk) 344 | local line = ls.linenumber 345 | checkcond(ls, vk == 'var' or vk == 'indexed', 'syntax error') 346 | vlist[#vlist+1] = var 347 | if lex_opt(ls, ',') then 348 | local n_var, n_vk = expr_primary(ast, ls) 349 | return parse_assignment(ast, ls, vlist, n_var, n_vk) 350 | else -- Parse RHS. 351 | lex_check(ls, '=') 352 | local exps = expr_list(ast, ls) 353 | local algebra = false 354 | for i=1,#vlist do 355 | algebra = algebra or is_algebra(vlist[i]) 356 | end 357 | if algebra then 358 | assert(#vlist == 1, 'not yet implemented') 359 | return ast:assignment_algebra_expr(vlist, exps, line) 360 | else 361 | return ast:assignment_expr(vlist, exps, line) 362 | end 363 | end 364 | end 365 | 366 | local function parse_call_assign(ast, ls) 367 | local var, vk = expr_primary(ast, ls) 368 | if vk == 'call' then 369 | return ast:new_statement_expr(var, ls.linenumber) 370 | else 371 | local vlist = { } 372 | return parse_assignment(ast, ls, vlist, var, vk) 373 | end 374 | end 375 | 376 | -- Parse 'local' statement. 377 | local function parse_local(ast, ls) 378 | local line = ls.linenumber 379 | if lex_opt(ls, 'TK_function') then -- Local function declaration. 380 | local name = lex_str(ls) 381 | local args, body, proto = parse_body(ast, ls, line, false) 382 | return ast:local_function_decl(name, args, body, proto) 383 | else -- Local variable declaration. 384 | local vl = { } 385 | repeat -- Collect LHS. 386 | vl[#vl+1] = lex_str(ls) 387 | until not lex_opt(ls, ',') 388 | local exps 389 | if lex_opt(ls, '=') then -- Optional RHS. 390 | exps = expr_list(ast, ls) 391 | else 392 | exps = { } 393 | end 394 | return ast:local_decl(vl, exps, line) 395 | end 396 | end 397 | 398 | local function parse_func(ast, ls, line) 399 | local needself = false 400 | ls:next() -- Skip 'function'. 401 | -- Parse function name. 402 | local v = var_lookup(ast, ls) 403 | while ls.token == '.' do -- Multiple dot-separated fields. 404 | v = expr_field(ast, ls, v) 405 | end 406 | if ls.token == ':' then -- Optional colon to signify method call. 407 | needself = true 408 | v = expr_field(ast, ls, v) 409 | end 410 | local args, body, proto = parse_body(ast, ls, line, needself) 411 | return ast:function_decl(v, args, body, proto) 412 | end 413 | 414 | local function parse_while(ast, ls, line) 415 | ls:next() -- Skip 'while'. 416 | local cond = expr(ast, ls) 417 | ast:fscope_begin() 418 | lex_check(ls, 'TK_do') 419 | local body = parse_block(ast, ls) 420 | local lastline = ls.linenumber 421 | lex_match(ls, 'TK_end', 'TK_while', line) 422 | ast:fscope_end() 423 | return ast:while_stmt(cond, body, line, lastline) 424 | end 425 | 426 | local function parse_then(ast, ls, tests, line) 427 | ls:next() 428 | tests[#tests+1] = expr(ast, ls) 429 | lex_check(ls, 'TK_then') 430 | return parse_block(ast, ls, line) 431 | end 432 | 433 | local function parse_if(ast, ls, line) 434 | local tests, blocks = { }, { } 435 | blocks[1] = parse_then(ast, ls, tests, line) 436 | while ls.token == 'TK_elseif' do 437 | blocks[#blocks+1] = parse_then(ast, ls, tests, ls.linenumber) 438 | end 439 | local else_branch 440 | if ls.token == 'TK_else' then 441 | local eline = ls.linenumber 442 | ls:next() -- Skip 'else'. 443 | else_branch = parse_block(ast, ls, eline) 444 | end 445 | lex_match(ls, 'TK_end', 'TK_if', line) 446 | return ast:if_stmt(tests, blocks, else_branch, line) 447 | end 448 | 449 | local function parse_label(ast, ls) 450 | ls:next() -- Skip '::'. 451 | local name = lex_str(ls) 452 | lex_check(ls, 'TK_label') 453 | -- Recursively parse trailing statements: labels and ';' (Lua 5.2 only). 454 | while true do 455 | if ls.token == 'TK_label' then 456 | parse_label(ast, ls) 457 | elseif LJ_52 and ls.token == ';' then 458 | ls:next() 459 | else 460 | break 461 | end 462 | end 463 | return ast:label_stmt(name, ls.linenumber) 464 | end 465 | 466 | local function parse_goto(ast, ls) 467 | local line = ls.linenumber 468 | local name = lex_str(ls) 469 | return ast:goto_stmt(name, line) 470 | end 471 | 472 | -- Parse a statement. Returns the statement itself and a boolean that tells if it 473 | -- must be the last one in a chunk. 474 | local function parse_stmt(ast, ls) 475 | local line = ls.linenumber 476 | local stmt 477 | if ls.token == 'TK_if' then 478 | stmt = parse_if(ast, ls, line) 479 | elseif ls.token == 'TK_while' then 480 | stmt = parse_while(ast, ls, line) 481 | elseif ls.token == 'TK_do' then 482 | ls:next() 483 | local body = parse_block(ast, ls) 484 | local lastline = ls.linenumber 485 | lex_match(ls, 'TK_end', 'TK_do', line) 486 | stmt = ast:do_stmt(body, line, lastline) 487 | elseif ls.token == 'TK_for' then 488 | stmt = parse_for(ast, ls, line) 489 | elseif ls.token == 'TK_repeat' then 490 | stmt = parse_repeat(ast, ls, line) 491 | elseif ls.token == 'TK_function' then 492 | stmt = parse_func(ast, ls, line) 493 | elseif ls.token == 'TK_local' then 494 | ls:next() 495 | stmt = parse_local(ast, ls, line) 496 | elseif ls.token == 'TK_return' then 497 | stmt = parse_return(ast, ls, line) 498 | return stmt, true -- Must be last. 499 | elseif ls.token == 'TK_break' then 500 | ls:next() 501 | stmt = ast:break_stmt(line) 502 | return stmt, not LJ_52 -- Must be last in Lua 5.1. 503 | elseif LJ_52 and ls.token == ';' then 504 | ls:next() 505 | return parse_stmt(ast, ls) 506 | elseif ls.token == 'TK_label' then 507 | stmt = parse_label(ast, ls) 508 | elseif ls.token == 'TK_goto' then 509 | if LJ_52 or ls:lookahead() == 'TK_name' then 510 | ls:next() 511 | stmt = parse_goto(ast, ls) 512 | end 513 | end 514 | -- If here 'stmt' is "nil" then ls.token didn't match any of the previous rules. 515 | -- Fall back to call/assign rule. 516 | if not stmt then 517 | stmt = parse_call_assign(ast, ls) 518 | end 519 | return stmt, false 520 | end 521 | 522 | local function parse_params(ast, ls, needself) 523 | lex_check(ls, "(") 524 | local args = { } 525 | if needself then 526 | args[1] = ast:var_declare("self") 527 | end 528 | if ls.token ~= ")" then 529 | repeat 530 | if ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto') then 531 | local name = lex_str(ls) 532 | args[#args+1] = ast:var_declare(name) 533 | elseif ls.token == 'TK_dots' then 534 | ls:next() 535 | ls.fs.varargs = true 536 | args[#args + 1] = ast:expr_vararg() 537 | break 538 | else 539 | err_syntax(ls, " or \"...\" expected") 540 | end 541 | until not lex_opt(ls, ',') 542 | end 543 | lex_check(ls, ")") 544 | return args 545 | end 546 | 547 | local function new_proto(ls, varargs) 548 | return { varargs = varargs } 549 | end 550 | 551 | local function parse_block_stmts(ast, ls) 552 | local firstline = ls.linenumber 553 | local stmt, islast = nil, false 554 | local body = { } 555 | while not islast and not EndOfBlock[ls.token] do 556 | stmt, islast = parse_stmt(ast, ls) 557 | body[#body + 1] = stmt 558 | lex_opt(ls, ';') 559 | end 560 | return body, firstline, ls.linenumber 561 | end 562 | 563 | local function parse_chunk(ast, ls) 564 | local body, firstline, lastline = parse_block_stmts(ast, ls) 565 | return ast:chunk(body, ls.chunkname, 0, lastline) 566 | end 567 | 568 | -- Parse body of a function. 569 | function parse_body(ast, ls, line, needself) 570 | local pfs = ls.fs 571 | ls.fs = new_proto(ls, false) 572 | ast:fscope_begin() 573 | ls.fs.firstline = line 574 | local args = parse_params(ast, ls, needself) 575 | local body = parse_block(ast, ls) 576 | ast:fscope_end() 577 | local proto = ls.fs 578 | if ls.token ~= 'TK_end' then 579 | lex_match(ls, 'TK_end', 'TK_function', line) 580 | end 581 | ls.fs.lastline = ls.linenumber 582 | ls:next() 583 | ls.fs = pfs 584 | return args, body, proto 585 | end 586 | 587 | function parse_block(ast, ls, firstline) 588 | ast:fscope_begin() 589 | local body = parse_block_stmts(ast, ls) 590 | body.firstline, body.lastline = firstline, ls.linenumber 591 | ast:fscope_end() 592 | return body 593 | end 594 | 595 | local function parse(ast, ls) 596 | ls:next() 597 | ls.fs = new_proto(ls, true) 598 | ast:fscope_begin() 599 | local chunk = parse_chunk(ast, ls) 600 | ast:fscope_end() 601 | if ls.token ~= 'TK_eof' then 602 | err_token(ls, 'TK_eof') 603 | end 604 | return chunk 605 | end 606 | 607 | return parse 608 | -------------------------------------------------------------------------------- /reader.lua: -------------------------------------------------------------------------------- 1 | local strsub = string.sub 2 | 3 | local function new_string_reader(src) 4 | local pos = 1 5 | local function reader() 6 | local chunk = strsub(src, pos, pos + 4096 - 32) 7 | pos = pos + #chunk 8 | return #chunk > 0 and chunk or nil 9 | end 10 | return reader 11 | end 12 | 13 | local function new_file_reader(filename) 14 | local f 15 | if filename then 16 | f = assert(io.open(filename, 'r'), "cannot open file " .. filename) 17 | else 18 | f = io.stdin 19 | end 20 | local function reader() 21 | return f:read(4096 - 32) 22 | end 23 | return reader 24 | end 25 | 26 | return { 27 | string = new_string_reader, 28 | file = new_file_reader, 29 | } 30 | -------------------------------------------------------------------------------- /transform.lua: -------------------------------------------------------------------------------- 1 | local lua_ast = require('sci-lang.lua-ast') 2 | 3 | local function add_body(body, ...) 4 | local arg = { ... } 5 | for i=1,#arg do 6 | body[#body + 1] = arg[i] 7 | end 8 | end 9 | 10 | local function aexpr_index(ast, var, line) 11 | local p_idx = ast:expr_property(var, '_p', line) 12 | return ast:expr_index(p_idx, ast:identifier('__i'), line) 13 | end 14 | 15 | local function aexpr_loop1(ast, lhs, rhs) 16 | return ast:for_stmt( 17 | ast:identifier('__i', 1), 18 | ast:literal(0, 1), 19 | ast:expr_binop('-', ast:expr_property(lhs, '_n', 1), ast:literal(1, 1), 1), 20 | nil, 21 | { ast:assignment_expr({ aexpr_index(ast, lhs, 1) }, { rhs }, 1) }, 22 | 1, 1) 23 | end 24 | 25 | local function aexpr_dim(ast, what, arrays) 26 | return ast:expr_function_call(ast:identifier('__dim_'..what, 1), arrays, 1) 27 | end 28 | 29 | local function aexpr_terminal(ast, node, fargs, fvals) 30 | assert(#fargs == #fvals) 31 | local kind, ivar = node.kind, #fargs + 1 32 | fargs[ivar] = ast:identifier('__x'..ivar, 1) 33 | if kind == 'IndexAlgebraExpression' then 34 | fvals[ivar] = node.object 35 | elseif kind == 'Identifier' or kind == 'Literal' then 36 | fvals[ivar] = node 37 | end 38 | return fargs[ivar] 39 | end 40 | 41 | local aexpr_set 42 | 43 | local function aexpr_linear_access(ast, node, fbody, fargs, fvals, temps, arrays) 44 | assert(type(temps) == 'table') 45 | local kind, operator = node.kind, node.operator 46 | if kind == 'IndexAlgebraExpression' then 47 | local var = aexpr_terminal(ast, node, fargs, fvals) 48 | arrays[#arrays + 1] = var 49 | return aexpr_index(ast, var, 1) 50 | elseif kind == 'Identifier' or kind == 'Literal' then 51 | return aexpr_terminal(ast, node, fargs, fvals) 52 | elseif kind == 'UnaryAlgebraExpression' then 53 | return ast:expr_unop(node.operator, aexpr_linear_access(ast, node.argument, fbody, fargs, fvals, temps, arrays), node.line) 54 | elseif kind == 'BinaryAlgebraExpression' then 55 | if operator == '**' or operator == '^^' then 56 | local ivar = #temps + 1 57 | temps[ivar] = ast:identifier('__t'..ivar, 1) 58 | arrays[#arrays + 1] = temps[ivar] 59 | aexpr_set(ast, node, temps[ivar], ast:identifier('__stack_array', 1), fbody, fargs, fvals, temps) 60 | return aexpr_index(ast, temps[ivar], 1) 61 | else 62 | local left = aexpr_linear_access(ast, node.left, fbody, fargs, fvals, temps, arrays) 63 | local right = aexpr_linear_access(ast, node.right, fbody, fargs, fvals, temps, arrays) 64 | return ast:expr_binop(node.operator, left, right, node.line) 65 | end 66 | end 67 | error('internal: unreachable') 68 | end 69 | 70 | local function aexpr_elw_set(ast, node, out, out_kind, fbody, fargs, fvals, temps) 71 | local arrays = { } 72 | local access = aexpr_linear_access(ast, node, fbody, fargs, fvals, temps, arrays) 73 | local pre 74 | if out_kind then 75 | local __dim = aexpr_dim(ast, 'elw_'..(#arrays), arrays) 76 | pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { arrays[1], __dim }, 1) }, 1) 77 | else 78 | local __dim = aexpr_dim(ast, 'elw_'..(#arrays + 1), { out, unpack(arrays) }) 79 | pre = ast:new_statement_expr(__dim, 1) 80 | end 81 | local elw = aexpr_loop1(ast, out, access) 82 | return pre, elw 83 | end 84 | 85 | local function aexpr_singlify(ast, node, fbody, fargs, fvals, temps) 86 | local kind, transpose = node.kind, false 87 | if kind == 'UnaryAlgebraExpression' and node.operator == '`' then 88 | transpose = true 89 | node = node.argument 90 | kind = node.kind 91 | end 92 | transpose = ast:literal(transpose, 1) 93 | if kind == 'IndexAlgebraExpression' or kind == 'Identifier' or kind == 'Literal' then 94 | return aexpr_terminal(ast, node, fargs, fvals), transpose 95 | else 96 | local ivar = #temps + 1 97 | temps[ivar] = ast:identifier('__t'..ivar, 1) 98 | aexpr_set(ast, node, temps[ivar], ast:identifier('__stack_array', 1), fbody, fargs, fvals, temps) 99 | return temps[ivar], transpose 100 | end 101 | end 102 | 103 | local function aexpr_mul_set(ast, out, out_kind, left, right, left_tr, right_tr) 104 | local __mul = ast:identifier('__mul', 1) 105 | local pre 106 | if out_kind then 107 | local __dim = aexpr_dim(ast, 'mul_2', { left, right, left_tr, right_tr }) 108 | pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { left, __dim }, 1) }, 1) 109 | else 110 | local __dim = aexpr_dim(ast, 'mul_3', { out, left, right, left_tr, right_tr }) 111 | pre = ast:new_statement_expr(__dim, 1) 112 | end 113 | local mul = ast:new_statement_expr(ast:expr_function_call(__mul, { out, left, right, left_tr, right_tr }, 1), 1) 114 | return pre, mul 115 | end 116 | 117 | local function aexpr_pow_set(ast, out, out_kind, left, right) 118 | local __pow = ast:identifier('__pow', 1) 119 | local pre 120 | if out_kind then 121 | local __dim = aexpr_dim(ast, 'pow_1', { left }) 122 | pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { left, __dim }, 1) }, 1) 123 | else 124 | local __dim = aexpr_dim(ast, 'pow_2', { out, left }) 125 | pre = ast:new_statement_expr(__dim, 1) 126 | end 127 | local pow = ast:new_statement_expr(ast:expr_function_call(__pow, { out, left, right }, 1), 1) 128 | return pre, pow 129 | end 130 | 131 | aexpr_set = function(ast, node, out, out_kind, fbody, fargs, fvals, temps) 132 | local kind, operator = node.kind, node.operator 133 | if kind == 'BinaryAlgebraExpression' and (operator == '**' or operator == '^^') then 134 | local left, left_tr = aexpr_singlify(ast, node.left, fbody, fargs, fvals, temps) 135 | local right, right_tr = aexpr_singlify(ast, node.right, fbody, fargs, fvals, temps) 136 | if operator == '**' then 137 | add_body(fbody, aexpr_mul_set(ast, out, out_kind, left, right, left_tr, right_tr)) 138 | else 139 | add_body(fbody, aexpr_pow_set(ast, out, out_kind, left, right)) 140 | end 141 | else 142 | add_body(fbody, aexpr_elw_set(ast, node, out, out_kind, fbody, fargs, fvals, temps)) 143 | end 144 | end 145 | 146 | local expr_count = 0 147 | local proto = { firstline = 1, lastline = 1 } 148 | 149 | local function aexpr_clear(ast, temps, fbody) 150 | if #temps > 0 then 151 | add_body(fbody, ast:new_statement_expr(ast:expr_function_call(ast:identifier('__stack_clear', 1), { }, 1), 1)) 152 | end 153 | end 154 | 155 | local function aexpr_root(ast, fargs, fvals, set_node, out_kind, return_stmt) 156 | expr_count = expr_count + 1 157 | 158 | local __r1 = ast:identifier('__r1', 1) 159 | local fbody, temps = { }, { } 160 | 161 | aexpr_set(ast, set_node, __r1, out_kind, fbody, fargs, fvals, temps) 162 | aexpr_clear(ast, temps, fbody) 163 | add_body(fbody, return_stmt) 164 | fbody.lastline = 1 165 | 166 | ast.pre[#ast.pre + 1] = ast:local_function_decl('__aexpr_'..expr_count, fargs, fbody, proto) 167 | return ast:expr_function_call(ast:identifier('__aexpr_'..expr_count, 1), fvals, 1) 168 | end 169 | 170 | local function aexpr_new(ast, node) 171 | local __r1 = ast:identifier('__r1', 1) 172 | return aexpr_root(ast, { }, { }, node, ast:identifier('__array_alloc', 1), ast:return_stmt({ __r1 }, 1)) 173 | end 174 | 175 | local function aexpr_assign(ast, node) 176 | local __r1 = ast:identifier('__r1', 1) 177 | return aexpr_root(ast, { __r1 }, { node.left[1].object }, node.right[1], nil, nil) 178 | end 179 | 180 | local transform_map = { 181 | IndexAlgebraExpression = aexpr_new, 182 | UnaryAlgebraExpression = aexpr_new, 183 | BinaryAlgebraExpression = aexpr_new, 184 | AssignmentAlgebraExpression = aexpr_assign, 185 | } 186 | 187 | local function transform(ast, node) 188 | if type(node) == 'table' then 189 | local transform_kind = transform_map[node.kind] -- Fails if not node. 190 | if transform_kind then -- To be transformed nodes. 191 | return transform_kind(ast, node) 192 | else -- Not to be transformed nodes. 193 | local o = { } 194 | for k,v in pairs(node) do 195 | o[k] = transform(ast, v) 196 | end 197 | return o 198 | end 199 | end 200 | return node -- Not nodes. 201 | end 202 | 203 | local function localize(ast, what, from, line) 204 | local lhs, rhs = { }, { } 205 | for i,k in ipairs(what) do 206 | lhs[i] = '__'..k 207 | rhs[i] = ast:expr_property(from, k, line) 208 | end 209 | return ast:local_decl(lhs, rhs, line) 210 | end 211 | 212 | local function pre_init(ast) 213 | local dim_elw_x = { } 214 | for i=1,10 do dim_elw_x[i] = 'dim_elw_'..i end 215 | local __alg = ast:identifier('__alg', 1) 216 | return { 217 | ast:local_decl( 218 | { __alg.name }, 219 | { ast:expr_property( 220 | ast:expr_function_call(ast:identifier('require', 1), { ast:literal('sci.alg', 1) }, 1), 221 | '__', 222 | 1), }, 223 | 1), 224 | localize(ast, { 'mul', 'pow', 'dim_mul_2', 'dim_mul_3', 'dim_pow_1', 'dim_pow_2', 'stack_array', 'stack_clear', 'array_alloc' }, __alg, 1), 225 | localize(ast, dim_elw_x, __alg, 1) 226 | } 227 | end 228 | 229 | local function root(tree) 230 | local tast = lua_ast.New() 231 | tast:fscope_begin() 232 | tast.pre = pre_init(tast) 233 | local valid_tree = transform(tast, tree) 234 | for i=1,#tast.pre do 235 | table.insert(valid_tree.body, i, tast.pre[i]) 236 | end 237 | tast:fscope_end() 238 | return valid_tree 239 | end 240 | 241 | return { 242 | root = root, 243 | } 244 | 245 | -------------------------------------------------------------------------------- /util.lua: -------------------------------------------------------------------------------- 1 | local exports = { } 2 | 3 | local function dump(node, level) 4 | if not level then level = 1 end 5 | if type(node) == 'nil' then 6 | return "null" 7 | end 8 | if type(node) == "string" then 9 | return '"'..node..'"' 10 | end 11 | if type(node) == "number" then 12 | return node 13 | end 14 | if type(node) == "boolean" or type(node) == "cdata" then 15 | return tostring(node) 16 | end 17 | if type(node) == "function" then 18 | return tostring(node) 19 | end 20 | 21 | local buff = { } 22 | local dent = string.rep(" ", level) 23 | local tput = table.insert 24 | 25 | if #node == 0 and next(node, nil) then 26 | tput(buff, "{") 27 | local i_buff = { } 28 | local p_buff = { } 29 | for k,data in pairs(node) do 30 | tput(buff, "\n"..dent..dump(k)..': '..dump(data, level + 1)) 31 | if next(node, k) then 32 | tput(buff, ",") 33 | end 34 | end 35 | tput(buff, "\n"..string.rep(" ", level - 1).."}") 36 | else 37 | tput(buff, "[") 38 | for i,data in pairs(node) do 39 | tput(buff, "\n"..dent..dump(data, level + 1)) 40 | if i ~= #node then 41 | tput(buff, ",") 42 | end 43 | end 44 | tput(buff, "\n"..string.rep(" ", level - 1).."]") 45 | end 46 | 47 | return table.concat(buff, "") 48 | end 49 | 50 | exports.dump = dump 51 | 52 | return exports 53 | --------------------------------------------------------------------------------