├── LICENSE
├── README.md
├── __bin
    └── scilua.lua
├── __meta.lua
├── ast-boolean-const-eval.lua
├── ast-const-eval.lua
├── ast-validate.lua
├── bcread.lua
├── bcsave.lua
├── bytecode.lua
├── compile.lua
├── generator.lua
├── lexer.lua
├── lua-ast.lua
├── luacode-generator.lua
├── operator.lua
├── parser.lua
├── reader.lua
├── transform.lua
└── util.lua


/LICENSE:
--------------------------------------------------------------------------------
 1 | ===============================================================================
 2 | 
 3 | Sci-Lang: Syntax extensions to LuaJIT for scientific computing.
 4 | 
 5 | Copyright (C) 2015-2016 Stefano Peluchetti. All rights reserved.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 
25 | [ MIT license: http://opensource.org/licenses/MIT ]
26 | 
27 | ===============================================================================
28 | 
29 | LuaJIT Language Toolkit, a toolkit for language implementations.
30 | 
31 | Copyright (C) 2013-2014 Francesco Abbate. All rights reserved.
32 | 
33 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
34 | 
35 | Based on Nyanga's language implementation of Richard Hundt. Copyright
36 | license of Nyanga's original work:
37 | 
38 | ===============================================================================
39 | 
40 | Nyanga -- Modifiable OO Lua Dialect. http://github.com/richardhundt/nyanga
41 | 
42 | Copyright (C) 2013-2014 Richard Hundt and contributors. All rights reserved.
43 | 
44 | Permission is hereby granted, free of charge, to any person obtaining a copy
45 | of this software and associated documentation files (the "Software"), to deal
46 | in the Software without restriction, including without limitation the rights
47 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
48 | copies of the Software, and to permit persons to whom the Software is
49 | furnished to do so, subject to the following conditions:
50 | 
51 | The above copyright notice and this permission notice shall be included in
52 | all copies or substantial portions of the Software.
53 | 
54 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
55 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
56 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
57 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
58 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
59 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
60 | THE SOFTWARE.
61 | 
62 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
63 | 
64 | ===============================================================================
65 | 
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | SciLua-Lang: Syntax extensions to LuaJIT for scientific computing
 2 | =================================================================
 3 | 
 4 | Based on the [LuaJIT Language Toolkit](https://github.com/franko/luajit-lang-toolkit) this executable introduces extensions to the LuaJIT syntax for algebra operations.
 5 | 
 6 | ## Features
 7 | 
 8 | - algebra expressions constructed via empty bracket `[]` indexing
 9 | - element-wise operations via plain Lua operators (`+-*/^%`)
10 | - matrix multiplication via `**`
11 | - matrix exponentiation via `^^`
12 | - transposition via `` ` ``
13 | - efficient implementation minimizes required allocations and loops
14 | - support for assignments
15 | 
16 | ```lua
17 | -- Replicate rand_mat_stat from Julia's benchmark suite:
18 | local function randmatstat(t)
19 |   local n = 5
20 |   local v, w = alg.vec(t), alg.vec(t)
21 |   for i=1,t do
22 |       local a, b, c, d = randn(n, n), randn(n, n), randn(n, n), randn(n, n)
23 |       local P = alg.join(a..b..c..d)
24 |       local Q = alg.join(a..b, c..d)
25 |       v[i] = alg.trace((P[]`**P[])^^4) -- Matrix transpose, product and power.
26 |       w[i] = alg.trace((Q[]`**Q[])^^4) -- Matrix transpose, product and power.
27 |   end
28 |   return sqrt(stat.var(v))/stat.mean(v), sqrt(stat.var(w))/stat.mean(w)
29 | end
30 | ```
31 | 
32 | ## Install
33 | 
34 | This module is included in the [ULua](http://ulua.io) distribution, to install it use:
35 | ```
36 | upkg add sci-lang
37 | ```
38 | 
39 | Alternatively, manually install this module making sure that all dependencies listed in the `require` section of [`__meta.lua`](__meta.lua) are installed as well (dependencies starting with `clib_` are standard C dynamic libraries).
40 | 
41 | ## Documentation
42 | 
43 | Refer to the [official documentation](http://scilua.org).


--------------------------------------------------------------------------------
/__bin/scilua.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | -- Set JIT default optimizations for sci.
 3 | jit.opt.start('callunroll=10', 'loopunroll=30')
 4 | 
 5 | local function usage()
 6 |   io.stderr:write[[
 7 | LuaJIT Language Toolkit usage: luajit [options]... [script [args]...].
 8 | 
 9 | Available options are:
10 |   -b ...    Save or list bytecode.
11 |   -c ...    Generate Lua code and run.
12 |             If followed by the "v" option the generated Lua code
13 |             will be printed.
14 | ]]
15 |   os.exit(1)
16 | end
17 | 
18 | local function check(success, result)
19 |     if not success then
20 |         io.stderr:write(result .. "\n")
21 |         os.exit(1)
22 |     else
23 |         return result
24 |     end
25 | end
26 | 
27 | local filename
28 | 
29 | local args = {...}
30 | local opt = {}
31 | local k = 1
32 | while args[k] do
33 |     local a = args[k]
34 |     if string.sub(args[k], 1, 1) == "-" then
35 |         if string.sub(a, 2, 2) == "b" then
36 |             local j = 1
37 |             if #a > 2 then
38 |                 args[j] = "-" .. string.sub(a, 3)
39 |                 j = j + 1
40 |             else
41 |                 table.remove(args, j)
42 |             end
43 |             require("sci-lang.bcsave").start(unpack(args))
44 |             os.exit(0)
45 |         elseif string.sub(a, 2, 2) == "c" then
46 |             opt.code = true
47 |             local copt = string.sub(a, 3, 3)
48 |             if copt == "v" then
49 |                 opt.debug = true
50 |             elseif copt ~= "" then
51 |                 print("Invalid Lua code option: ", copt)
52 |                 usage()
53 |             end
54 |         elseif string.sub(a, 2, 2) == "v" then
55 |             opt.debug = true
56 |         else
57 |             print("Invalid option: ", args[k])
58 |             usage()
59 |         end
60 |     else
61 |         filename = args[k]
62 |     end
63 |     k = k + 1
64 | end
65 | 
66 | if not filename then usage() end
67 | 
68 | local compile = require("sci-lang.compile")
69 | 
70 | -- Compute the bytecode string for the given filename.
71 | local luacode = check(compile.file(filename, opt))
72 | if opt.debug then
73 |     print(luacode)
74 |     print('\n\nOutput:')
75 | end
76 | local fn = assert(loadstring(luacode))
77 | fn()
78 | 
79 | 


--------------------------------------------------------------------------------
/__meta.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | return {
 3 |   name = "sci-lang",
 4 |   version = "1.0.beta10",
 5 |   require = {
 6 |     luajit = "2.0",
 7 |     sci = "1.0.beta10",
 8 |   },
 9 |   homepage = "https://github.com/stepelu/lua-sci-lang",
10 |   license  = "MIT/X11",
11 |   description = "Syntax extensions to LuaJIT for scientific computing",
12 | }


--------------------------------------------------------------------------------
/ast-boolean-const-eval.lua:
--------------------------------------------------------------------------------
 1 | local BoolConstRule = { }
 2 | 
 3 | -- A function that return a numeric constant if an AST node evaluate to an
 4 | -- arithmetic constant or "nil" otherwise.
 5 | -- The implementation of the function is given below.
 6 | local const_eval
 7 | 
 8 | local function dirop_compute(o, a, b)
 9 |    if o == 'and' then return a and b
10 |    elseif o == 'or' then return a or b
11 |    end
12 | end
13 | 
14 | function BoolConstRule.Literal(node)
15 |     local v = node.value
16 |     if type(v) == 'boolean' then return v end
17 | end
18 | 
19 | function BoolConstRule.BinaryExpression(node)
20 |     local o = node.operator
21 |     local a = const_eval(node.left)
22 |     if a ~= nil then
23 |         local b = const_eval(node.right)
24 |         if b ~= nil then
25 |             return dirop_compute(o, a, b)
26 |         end
27 |     end
28 | end
29 | 
30 | function BoolConstRule.UnaryExpression(node)
31 |     local o = node.operator
32 |     if o == 'not' then
33 |         local v = const_eval(node.argument)
34 |         if v ~= nil then return not v end
35 |     end
36 | end
37 | 
38 | function const_eval(node)
39 |     local rule = BoolConstRule[node.kind]
40 |     if rule then
41 |         return rule(node)
42 |     end
43 | end
44 | 
45 | return const_eval
46 | 


--------------------------------------------------------------------------------
/ast-const-eval.lua:
--------------------------------------------------------------------------------
 1 | local ConstRule = { }
 2 | 
 3 | -- A function that return a numeric constant if an AST node evaluate to an
 4 | -- arithmetic constant or "nil" otherwise.
 5 | -- The implementation of the function is given below.
 6 | local const_eval
 7 | 
 8 | local function dirop_compute(o, a, b)
 9 |    if     o == '+' then return a + b
10 |    elseif o == '-' then return a - b
11 |    elseif o == '*' then return a * b
12 |    elseif o == '/' then return (a ~= 0 or b ~= 0) and (a / b) or nil
13 |    elseif o == '%' then return a % b
14 |    elseif o == '^' then return a ^ b
15 |    end
16 | end
17 | 
18 | function ConstRule.Literal(node)
19 |     local v = node.value
20 |     if type(v) == 'number' then return v end
21 | end
22 | 
23 | function ConstRule.BinaryExpression(node)
24 |     local o = node.operator
25 |     local a = const_eval(node.left)
26 |     if a then
27 |         local b = const_eval(node.right)
28 |         if b then
29 |             return dirop_compute(o, a, b)
30 |         end
31 |     end
32 | end
33 | 
34 | function ConstRule.UnaryExpression(node)
35 |     local o = node.operator
36 |     if o == '-' then
37 |         local v = const_eval(node.argument)
38 |         if v then return -v end
39 |     end
40 | end
41 | 
42 | function const_eval(node)
43 |     local rule = ConstRule[node.kind]
44 |     if rule then
45 |         return rule(node)
46 |     end
47 | end
48 | 
49 | return const_eval
50 | 


--------------------------------------------------------------------------------
/ast-validate.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | -- Each entry of "syntax" describe a node of the AST tree.
  3 | -- The "properties" field gives the specification for the properties
  4 | -- of each node.
  5 | --
  6 | -- Each "properties" entry is of the form:
  7 | --
  8 | -- <name> = <ast_element_type>
  9 | --
 10 | -- where <ast_element_type> is a recursive type defined as follow:
 11 | -- it can be:
 12 | --
 13 | -- "Expression",
 14 | -- "Statement",
 15 | -- ...
 16 | -- to indicate a specific kind of "node". Alternatively a node can be
 17 | -- specified as;
 18 | --
 19 | -- { type = "node", kind = "Statement" }
 20 | --
 21 | -- In addition an <ast_element_type> can be also:
 22 | --
 23 | -- { type = "literal", value = "string" }
 24 | --
 25 | -- { type = "enum", values = {"a", "b", "c"} }
 26 | --
 27 | -- { type = "list", kind = <ast_element_type> }
 28 | --
 29 | -- { type = "choice", values = {<ast_element_type>, <ast_element_type>, ...} }
 30 | --
 31 | -- The latter two are defined recursively. A "list" is Lua table of element of a
 32 | -- given type. The "choice" allow an element to be either of one type or another.
 33 | --
 34 | 
 35 | local syntax = {
 36 |    Node = {
 37 |       kind = "Node",
 38 |       abstract = true
 39 |    },
 40 |    Expression = {
 41 |       kind = "Expression",
 42 |       base = "Node",
 43 |       abstract = true,
 44 |    },
 45 |    Statement = {
 46 |       kind = "Statement",
 47 |       base = "Node",
 48 |       abstract = true,
 49 |    },
 50 |    Chunk = {
 51 |       kind = "Chunk",
 52 |       base = "Node",
 53 |       properties = {
 54 |          body = {
 55 |             type = "list",
 56 |             kind = "Statement"
 57 |          },
 58 |          chunkname = { type = "literal", value = "string" },
 59 |       }
 60 |    },
 61 |    Identifier = {
 62 |       kind = "Identifier",
 63 |       base = "Expression",
 64 |       properties = {
 65 |          name = { type = "literal", value = "string" },
 66 |       }
 67 |    },
 68 |    Vararg = {
 69 |       kind = "Vararg",
 70 |       base = "Identifier",
 71 |       properties = { }
 72 |    },
 73 |    BinaryExpression = {
 74 |       kind = "BinaryExpression",
 75 |       base = "Expression",
 76 |       properties = {
 77 |          operator = {
 78 |             type   = "enum",
 79 |             values = {
 80 |                "+", "-", "*", "/", "^", "%",
 81 |                "==", "~=", ">=", ">", "<=", "<",
 82 |             }
 83 |          },
 84 |          left = "Expression",
 85 |          right = "Expression",
 86 |       }
 87 |    },
 88 |    BinaryAlgebraExpression = {
 89 |       kind = "BinaryAlgebraExpression",
 90 |       base = "BinaryExpression",
 91 |       properties = {
 92 |          operator = {
 93 |             type   = "enum",
 94 |             values = {
 95 |                "+", "-", "*", "/", "^", "%",
 96 |                "==", "~=", ">=", ">", "<=", "<",
 97 |                "**", "^^",
 98 |             }
 99 |          },
100 |          left = "Expression",
101 |          right = "Expression",
102 |       }
103 |    },
104 |    ConcatenateExpression = {
105 |       kind = "ConcatenateExpression",
106 |       base = "Expression",
107 |       properties = {
108 |          terms = {
109 |             type = "list",
110 |             kind = "Expression",
111 |          }
112 |       }
113 |    },
114 |    UnaryExpression = {
115 |       kind = "UnaryExpression",
116 |       base = "Expression",
117 |       properties = {
118 |          operator = {
119 |             type   = "enum",
120 |             values = { "not", "-", "#" },
121 |          },
122 |          argument = "Expression",
123 |       }
124 |    },
125 |    UnaryAlgebraExpression = {
126 |       kind = "UnaryAlgebraExpression",
127 |       base = "UnaryExpression",
128 |       properties = {
129 |          operator = {
130 |             type   = "enum",
131 |             values = { "not", "-", "#", "`" },
132 |          },
133 |          argument = "Expression",
134 |       }
135 |    },
136 |    ExpressionValue = {
137 |       kind = "ExpressionValue",
138 |       base = "Expression",
139 |       properties = {
140 |          value = "Expression",
141 |       }
142 |    },
143 |    AssignmentExpression = {
144 |       kind = "AssignmentExpression",
145 |       base = "Statement",
146 |       properties = {
147 |          left = {
148 |             type = "list",
149 |             kind = { type = "choice", values = { "MemberExpression", "Identifier" } },
150 |          },
151 |          right = {
152 |             type = "list",
153 |             kind = "Expression",
154 |          }
155 |       }
156 |    },
157 |    AssignmentAlgebraExpression = {
158 |       kind = "AssignmentAlgebraExpression",
159 |       base = "AssignmentExpression",
160 |       properties = {
161 |          left = {
162 |             type = "list",
163 |             kind = { type = "choice", values = { "MemberExpression", "Identifier" } },
164 |          },
165 |          right = {
166 |             type = "list",
167 |             kind = "Expression",
168 |          }
169 |       }
170 |    },
171 |    LogicalExpression = {
172 |       kind = "LogicalExpression",
173 |       base = "Expression",
174 |       properties = {
175 |          operator = {
176 |             type = "enum",
177 |             values = { "and", "or" }
178 |          },
179 |          left  = "Expression",
180 |          right = "Expression",
181 |       }
182 |    },
183 |    MemberExpression = {
184 |       kind = "MemberExpression",
185 |       base = "Expression",
186 |       properties = {
187 |          object = "Expression",
188 |          property = "Expression",
189 |          computed = {
190 |             type = "literal",
191 |             value = "boolean",
192 |             default = false
193 |          },
194 |       }
195 |    },
196 |    IndexAlgebraExpression = {
197 |       kind = "IndexAlgebraExpression",
198 |       base = "MemberExpression",
199 |       properties = {
200 |          object = "Expression",
201 |       }
202 |    },
203 |    CallExpression = {
204 |       kind = "CallExpression",
205 |       base = "Expression",
206 |       properties = {
207 |          callee = "Expression",
208 |          arguments = { type = "list", kind = "Expression" },
209 |       }
210 |    },
211 |    SendExpression = {
212 |       kind = "SendExpression",
213 |       base = "Expression",
214 |       properties = {
215 |          receiver = "Expression",
216 |          method = "Identifier",
217 |          arguments = {
218 |             type = "list",
219 |             kind = "Expression"
220 |          }
221 |       }
222 |     },
223 |     Literal = {
224 |       kind = "Literal",
225 |       base = "Expression",
226 |       properties = {
227 |          value = {
228 |             type = "choice",
229 |             values = {
230 |                { type = "literal", value = "string" },
231 |                { type = "literal", value = "number" },
232 |                { type = "literal", value = "nil" },
233 |                { type = "literal", value = "boolean" },
234 |                { type = "literal", value = "cdata" },
235 |             }
236 |          },
237 |       }
238 |    },
239 |    Table = {
240 |       kind = "Table",
241 |       base = "Expression",
242 |       properties = {
243 |          array_entries = {
244 |             type = "list",
245 |             kind = "Expression",
246 |          },
247 |          hash_keys = {
248 |             type = "list",
249 |             kind = "Expression",
250 |          },
251 |          hash_values = {
252 |             type = "list",
253 |             kind = "Expression",
254 |          },
255 |       }
256 |    },
257 |    ExpressionStatement = {
258 |       kind = "ExpressionStatement",
259 |       base = "Statement",
260 |       properties = {
261 |          expression = {
262 |             type = "choice",
263 |             values = { "Statement", "Expression" },
264 |          }
265 |       }
266 |    },
267 |    EmptyStatement = {
268 |       kind = "EmptyStatement",
269 |       base = "Statement",
270 |       properties = { },
271 |    },
272 |    DoStatement = {
273 |       kind = "DoStatement",
274 |       base = "Statement",
275 |       properties = {
276 |          body = {
277 |             type = "list",
278 |             kind = "Statement",
279 |          }
280 |       }
281 |    },
282 |    IfStatement = {
283 |       kind = "IfStatement",
284 |       base = "Statement",
285 |       properties = {
286 |          tests = {
287 |             type = "list",
288 |             kind = "Expression",
289 |          },
290 |          cons = {
291 |             type = "list",
292 |             kind = { type = "list", kind = "Statement" },
293 |          },
294 |          alternate = {
295 |             type = "list",
296 |             kind = "Statement",
297 |             optional = true,
298 |          }
299 |       }
300 |    },
301 |    LabelStatement = {
302 |       kind = "LabelStatement",
303 |       base = "Statement",
304 |       properties = {
305 |          label = { type = "literal", value = "string" },
306 |       }
307 |    },
308 |    GotoStatement = {
309 |       kind = "GotoStatement",
310 |       base = "Statement",
311 |       properties = {
312 |          label = { type = "literal", value = "string" }
313 |       }
314 |    },
315 |    BreakStatement = {
316 |       kind = "BreakStatement",
317 |       base = "Statement",
318 |       properties = { },
319 |    },
320 |    ReturnStatement = {
321 |       kind = "ReturnStatement",
322 |       base = "Statement",
323 |       properties = {
324 |          arguments = {
325 |             type = "list",
326 |             kind = "Expression"
327 |          }
328 |       }
329 |    },
330 |    WhileStatement = {
331 |       kind = "WhileStatement",
332 |       base = "Statement",
333 |       properties = {
334 |          test = "Expression",
335 |          body = {
336 |             type = "list",
337 |             kind = "Statement"
338 |          }
339 |       }
340 |    },
341 |    RepeatStatement = {
342 |       kind = "RepeatStatement",
343 |       base = "Statement",
344 |       properties = {
345 |          test = "Expression",
346 |          body = {
347 |             type = "list",
348 |             kind = "Statement",
349 |          },
350 |       }
351 |    },
352 |    ForInit = {
353 |       kind = "ForInit",
354 |       base = "Expression",
355 |       properties = {
356 |          id = "Identifier",
357 |          value = "Expression",
358 |       }
359 |    },
360 |    ForStatement = {
361 |       kind = "ForStatement",
362 |       base = "Statement",
363 |       properties = {
364 |          init = "ForInit",
365 |          last = "Expression",
366 |          step = {
367 |             type = "node",
368 |             kind = "Expression",
369 |             optional = true,
370 |          },
371 |          body = {
372 |             type = "list",
373 |             kind = "Statement",
374 |          },
375 |       }
376 |    },
377 |    ForNames = {
378 |       kind = "ForNames",
379 |       base = "Expression",
380 |       properties = {
381 |          names = {
382 |             type = "list",
383 |             kind = "Identifier",
384 |          }
385 |       }
386 |    },
387 |    ForInStatement = {
388 |       kind = "ForInStatement",
389 |       base = "Statement",
390 |       properties = {
391 |          namelist = "ForNames",
392 |          explist = {
393 |             type = "list",
394 |             kind = "Expression"
395 |          },
396 |          body = {
397 |             type = "list",
398 |             kind = "Statement",
399 |          },
400 |       }
401 |    },
402 |    LocalDeclaration = {
403 |       kind = "LocalDeclaration",
404 |       base = "Statement",
405 |       properties = {
406 |          names = {
407 |             type = "list",
408 |             kind = "Identifier"
409 |          },
410 |          expressions = {
411 |             type = "list",
412 |             kind = "Expression"
413 |          }
414 |       }
415 |    },
416 |    FunctionDeclaration = {
417 |       kind = "FunctionDeclaration",
418 |       base = "Statement",
419 |       properties = {
420 |          id = {
421 |             type = "choice",
422 |             values = { "MemberExpression", "Identifier" },
423 |          },
424 |          body = {
425 |             type = "list",
426 |             kind = "Statement",
427 |          },
428 |          params = {
429 |             type = "list",
430 |             kind = "Identifier",
431 |          },
432 |          vararg = {
433 |             type = "literal",
434 |             value = "boolean",
435 |             default = false
436 |          },
437 |          locald = {
438 |             type = "literal",
439 |             value = "boolean",
440 |             default = false
441 |          }
442 |       }
443 |    },
444 |    FunctionExpression = {
445 |       kind = "FunctionExpression",
446 |       base = "Expression",
447 |       properties = {
448 |          body = {
449 |             type = "list",
450 |             kind = "Statement",
451 |          },
452 |          params = {
453 |             type = "list",
454 |             kind = "Identifier",
455 |          },
456 |          vararg = {
457 |             type = "literal",
458 |             value = "boolean",
459 |             default = false
460 |          }
461 |       }
462 |    }
463 | }
464 | 
465 | local check
466 | 
467 | local function iskind(prop, tag)
468 |    if type(prop) ~= "table" then
469 |       return false
470 |    end
471 |    local meta = syntax[prop.kind]
472 |    while meta do
473 |       if meta.kind == tag then
474 |          return true
475 |       end
476 |       meta = syntax[meta.base]
477 |    end
478 |    return false
479 | end
480 | 
481 | local function isnode(prop)
482 |    return iskind(prop, "Node")
483 | end
484 | 
485 | local function kind2str(spec)
486 |    if type(spec) == "string" then
487 |       return spec
488 |    elseif spec.type == "node" then
489 |       return spec.kind
490 |    elseif spec.type == "list" then
491 |       local etype = kind2str(spec.kind)
492 |       return "list of " .. etype
493 |    elseif spec.type == "enum" then
494 |       local ls = {}
495 |       for i = 1, #spec.values do ls[i] = spec.values[i] end
496 |       return table.concat(ls, ", ")
497 |    elseif spec.type == "literal" then
498 |       return "literal " .. spec.value
499 |    elseif spec.type == "choice" then
500 |       local ls = {}
501 |       for i = 1, #spec.values do ls[i] = kind2str(spec.values[i]) end
502 |       return table.concat(ls, "|")
503 |    else
504 |       error("internal error: invalid spec type")
505 |    end
506 | end
507 | 
508 | local function check_node(tag, prop)
509 |    if not isnode(prop) then
510 |       return false, "expected Node"
511 |    end
512 |    if not iskind(prop, tag) then
513 |       return false, "expected " .. tag
514 |    end
515 |    return true
516 | end
517 | 
518 | local function check_list(spec, prop)
519 |    if type(prop) ~= "table" then
520 |       return false, "expected list of "..kind2str(spec.kind).." (got "..type(prop)..")"
521 |    end
522 |    if isnode(prop) then
523 |       return false, "expected list of "..kind2str(spec.kind).." (got node)"
524 |    end
525 |    for i=1, #prop do
526 |       local ok, err = check(spec.kind, prop[i])
527 |       if not ok then
528 |          return false, err.." (got "..prop[i].kind..")"
529 |       end
530 |    end
531 |    return true
532 | end
533 | 
534 | local function check_enum(spec, prop)
535 |    for i=1, #spec.values do
536 |       if prop == spec.values[i] then return true end
537 |    end
538 |    return false, "expected one of "..kind2str(spec).." (got '"..tostring(prop).."')"
539 | end
540 | 
541 | local function check_literal(spec, prop)
542 |    assert(type(spec.value) == "string")
543 |    if type(prop) ~= spec.value then
544 |       return false, "expected "..spec.value.." (got "..type(prop)..")"
545 |    end
546 |    return true
547 | end
548 | 
549 | local function check_choice(spec, prop)
550 |    for i = 1, #spec.values do
551 |       if check(spec.values[i], prop) then
552 |          return true
553 |       end
554 |    end
555 |    return false, "expected one of "..kind2str(spec).." (got '"..tostring(prop).."')"
556 | end
557 | 
558 | function check(spec, prop)
559 |    if type(spec) == "string" then
560 |       return check_node(spec, prop)
561 |    elseif spec.type == "node" then
562 |       return check_node(spec.kind, prop)
563 |    elseif spec.type == "list" then
564 |       return check_list(spec, prop)
565 |    elseif spec.type == "enum" then
566 |       return check_enum(spec, prop)
567 |    elseif spec.type == "literal" then
568 |       return check_literal(spec, prop)
569 |    elseif spec.type == "choice" then
570 |       return check_choice(spec, prop)
571 |    else
572 |       error("internal error: invalid spec type")
573 |    end
574 | end
575 | 
576 | local function validate(meta, node)
577 |    if meta == nil then
578 |       error("unknown node kind: "..node.kind)
579 |    end
580 |    for name, spec in pairs(meta.properties) do
581 |       if node[name] == nil and type(spec.default) ~= 'nil' then
582 |          node[name] = spec.default
583 |       end
584 |       local prop = node[name]
585 |       if prop ~= nil or not spec.optional then
586 |          local ok, er = check(spec, prop)
587 |          if not ok then
588 |             error(er.." for "..(node.kind or "?").."."..name)
589 |          end
590 |       end
591 |    end
592 |    return node
593 | end
594 | 
595 | local function build(kind, props)
596 |    local meta = syntax[kind]
597 |    props.kind = kind
598 |    return validate(meta, props)
599 | end
600 | 
601 | return {
602 |    syntax = syntax,
603 |    build  = build,
604 | }
605 | 


--------------------------------------------------------------------------------
/bcread.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | -- LuaJIT Language Toolkit.
  3 | --
  4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved.
  5 | --
  6 | -- Major portions taken verbatim or adapted from
  7 | -- LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
  8 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
  9 | --
 10 | -- See Copyright Notice in LICENSE
 11 | --
 12 | 
 13 | local ffi = require("ffi")
 14 | 
 15 | local band, bor, shl, shr, bnot = bit.band, bit.bor, bit.lshift, bit.rshift, bit.bnot
 16 | local strsub, strbyte, strchar, format, gsub = string.sub, string.byte, string.char, string.format, string.gsub
 17 | 
 18 | local BCDUMP = {
 19 |     HEAD1 = 0x1b,
 20 |     HEAD2 = 0x4c,
 21 |     HEAD3 = 0x4a,
 22 | 
 23 |     -- If you perform *any* kind of private modifications to the bytecode itself
 24 |     -- or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
 25 |     VERSION = 1,
 26 | 
 27 |     -- Compatibility flags.
 28 |     F_BE    = 0x01,
 29 |     F_STRIP = 0x02,
 30 |     F_FFI   = 0x04,
 31 | }
 32 | 
 33 | BCDUMP.F_KNOWN = BCDUMP.F_FFI*2-1
 34 | 
 35 | local BCDUMP_KGC_CHILD, BCDUMP_KGC_TAB, BCDUMP_KGC_I64, BCDUMP_KGC_U64, BCDUMP_KGC_COMPLEX, BCDUMP_KGC_STR = 0, 1, 2, 3, 4, 5
 36 | local BCDUMP_KTAB_NIL, BCDUMP_KTAB_FALSE, BCDUMP_KTAB_TRUE, BCDUMP_KTAB_INT, BCDUMP_KTAB_NUM, BCDUMP_KTAB_STR = 0, 1, 2, 3, 4, 5
 37 | 
 38 | local BCM_REF = {
 39 |     'none', 'dst', 'base', 'var', 'rbase', 'uv',  -- Mode A must be <= 7
 40 |     'lit', 'lits', 'pri', 'num', 'str', 'tab', 'func', 'jump', 'cdata'
 41 | }
 42 | 
 43 | local BCDEF_TAB = {
 44 |     {'ISLT', 'var', 'none', 'var', 'lt'},
 45 |     {'ISGE', 'var', 'none', 'var', 'lt'},
 46 |     {'ISLE', 'var', 'none', 'var', 'le'},
 47 |     {'ISGT', 'var', 'none', 'var', 'le'},
 48 | 
 49 |     {'ISEQV', 'var', 'none', 'var', 'eq'},
 50 |     {'ISNEV', 'var', 'none', 'var', 'eq'},
 51 |     {'ISEQS', 'var', 'none', 'str', 'eq'},
 52 |     {'ISNES', 'var', 'none', 'str', 'eq'},
 53 |     {'ISEQN', 'var', 'none', 'num', 'eq'},
 54 |     {'ISNEN', 'var', 'none', 'num', 'eq'},
 55 |     {'ISEQP', 'var', 'none', 'pri', 'eq'},
 56 |     {'ISNEP', 'var', 'none', 'pri', 'eq'},
 57 | 
 58 |     -- Unary test and copy ops.
 59 |     {'ISTC', 'dst', 'none', 'var', 'none'},
 60 |     {'ISFC', 'dst', 'none', 'var', 'none'},
 61 |     {'IST', 'none', 'none', 'var', 'none'},
 62 |     {'ISF', 'none', 'none', 'var', 'none'},
 63 | 
 64 |     -- Unary ops.
 65 |     {'MOV', 'dst', 'none', 'var', 'none'},
 66 |     {'NOT', 'dst', 'none', 'var', 'none'},
 67 |     {'UNM', 'dst', 'none', 'var', 'unm'},
 68 |     {'LEN', 'dst', 'none', 'var', 'len'},
 69 | 
 70 |     -- Binary ops. ORDER OPR. VV last, POW must be next.
 71 |     {'ADDVN', 'dst', 'var', 'num', 'add'},
 72 |     {'SUBVN', 'dst', 'var', 'num', 'sub'},
 73 |     {'MULVN', 'dst', 'var', 'num', 'mul'},
 74 |     {'DIVVN', 'dst', 'var', 'num', 'div'},
 75 |     {'MODVN', 'dst', 'var', 'num', 'mod'},
 76 | 
 77 |     {'ADDNV', 'dst', 'var', 'num', 'add'},
 78 |     {'SUBNV', 'dst', 'var', 'num', 'sub'},
 79 |     {'MULNV', 'dst', 'var', 'num', 'mul'},
 80 |     {'DIVNV', 'dst', 'var', 'num', 'div'},
 81 |     {'MODNV', 'dst', 'var', 'num', 'mod'},
 82 | 
 83 |     {'ADDVV', 'dst', 'var', 'var', 'add'},
 84 |     {'SUBVV', 'dst', 'var', 'var', 'sub'},
 85 |     {'MULVV', 'dst', 'var', 'var', 'mul'},
 86 |     {'DIVVV', 'dst', 'var', 'var', 'div'},
 87 |     {'MODVV', 'dst', 'var', 'var', 'mod'},
 88 | 
 89 |     {'POW', 'dst', 'var', 'var', 'pow'},
 90 |     {'CAT', 'dst', 'rbase', 'rbase', 'concat'},
 91 | 
 92 |     -- Constant ops.
 93 |     {'KSTR', 'dst', 'none', 'str', 'none'},
 94 |     {'KCDATA', 'dst', 'none', 'cdata', 'none'},
 95 |     {'KSHORT', 'dst', 'none', 'lits', 'none'},
 96 |     {'KNUM', 'dst', 'none', 'num', 'none'},
 97 |     {'KPRI', 'dst', 'none', 'pri', 'none'},
 98 |     {'KNIL', 'base', 'none', 'base', 'none'},
 99 | 
100 |     -- Upvalue and function ops.
101 |     {'UGET', 'dst', 'none', 'uv', 'none'},
102 |     {'USETV', 'uv', 'none', 'var', 'none'},
103 |     {'USETS', 'uv', 'none', 'str', 'none'},
104 |     {'USETN', 'uv', 'none', 'num', 'none'},
105 |     {'USETP', 'uv', 'none', 'pri', 'none'},
106 |     {'UCLO', 'rbase', 'none', 'jump', 'none'},
107 |     {'FNEW', 'dst', 'none', 'func', 'gc'},
108 | 
109 |     -- Table ops.
110 |     {'TNEW', 'dst', 'none', 'lit', 'gc'},
111 |     {'TDUP', 'dst', 'none', 'tab', 'gc'},
112 |     {'GGET', 'dst', 'none', 'str', 'index'},
113 |     {'GSET', 'var', 'none', 'str', 'newindex'},
114 |     {'TGETV', 'dst', 'var', 'var', 'index'},
115 |     {'TGETS', 'dst', 'var', 'str', 'index'},
116 |     {'TGETB', 'dst', 'var', 'lit', 'index'},
117 |     {'TSETV', 'var', 'var', 'var', 'newindex'},
118 |     {'TSETS', 'var', 'var', 'str', 'newindex'},
119 |     {'TSETB', 'var', 'var', 'lit', 'newindex'},
120 |     {'TSETM', 'base', 'none', 'num', 'newindex'},
121 | 
122 |     -- Calls and vararg handling. T = tail call.
123 |     {'CALLM', 'base', 'lit', 'lit', 'call'},
124 |     {'CALL', 'base', 'lit', 'lit', 'call'},
125 |     {'CALLMT', 'base', 'none', 'lit', 'call'},
126 |     {'CALLT', 'base', 'none', 'lit', 'call'},
127 |     {'ITERC', 'base', 'lit', 'lit', 'call'},
128 |     {'ITERN', 'base', 'lit', 'lit', 'call'},
129 |     {'VARG', 'base', 'lit', 'lit', 'none'},
130 |     {'ISNEXT', 'base', 'none', 'jump', 'none'},
131 | 
132 |     -- Returns.
133 |     {'RETM', 'base', 'none', 'lit', 'none'},
134 |     {'RET', 'rbase', 'none', 'lit', 'none'},
135 |     {'RET0', 'rbase', 'none', 'lit', 'none'},
136 |     {'RET1', 'rbase', 'none', 'lit', 'none'},
137 | 
138 |     -- Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop.
139 |     {'FORI', 'base', 'none', 'jump', 'none'},
140 |     {'JFORI', 'base', 'none', 'jump', 'none'},
141 | 
142 |     {'FORL', 'base', 'none', 'jump', 'none'},
143 |     {'IFORL', 'base', 'none', 'jump', 'none'},
144 |     {'JFORL', 'base', 'none', 'lit', 'none'},
145 | 
146 |     {'ITERL', 'base', 'none', 'jump', 'none'},
147 |     {'IITERL', 'base', 'none', 'jump', 'none'},
148 |     {'JITERL', 'base', 'none', 'lit', 'none'},
149 | 
150 |     {'LOOP', 'rbase', 'none', 'jump', 'none'},
151 |     {'ILOOP', 'rbase', 'none', 'jump', 'none'},
152 |     {'JLOOP', 'rbase', 'none', 'lit', 'none'},
153 | 
154 |     {'JMP', 'rbase', 'none', 'jump', 'none'},
155 | 
156 |     -- Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func.
157 |     {'FUNCF', 'rbase', 'none', 'none', 'none'},
158 |     {'IFUNCF', 'rbase', 'none', 'none', 'none'},
159 |     {'JFUNCF', 'rbase', 'none', 'lit', 'none'},
160 |     {'FUNCV', 'rbase', 'none', 'none', 'none'},
161 |     {'IFUNCV', 'rbase', 'none', 'none', 'none'},
162 |     {'JFUNCV', 'rbase', 'none', 'lit', 'none'},
163 |     {'FUNCC', 'rbase', 'none', 'none', 'none'},
164 |     {'FUNCCW', 'rbase',  'none', 'none', 'none'},
165 | }
166 | 
167 | local BC, BCMODE = {}, {}
168 | 
169 | local function BCM(name)
170 |     for i = 1, #BCM_REF do
171 |         if BCM_REF[i] == name then return i - 1 end
172 |     end
173 | end
174 | 
175 | local function BCDEF_EVAL()
176 |     for i = 1, #BCDEF_TAB do
177 |         local li = BCDEF_TAB[i]
178 |         local name, ma, mb, mc = li[1], BCM(li[2]), BCM(li[3]), BCM(li[4])
179 |         BC[i-1] = name
180 |         BCMODE[i-1] = bor(ma, shl(mb, 3), shl(mc, 7))
181 |     end
182 | end
183 | 
184 | BCDEF_EVAL()
185 | 
186 | local PROTO_REF = {
187 |     PROTO_CHILD  = 0x01,    -- Has child prototypes.
188 |     PROTO_VARARG = 0x02,    -- Vararg function.
189 |     PROTO_FFI    = 0x04,    -- Uses BC_KCDATA for FFI datatypes.
190 |     PROTO_NOJIT  = 0x08,    -- JIT disabled for this function.
191 |     PROTO_ILOOP  = 0x10,    -- Patched bytecode with ILOOP etc.
192 |     -- Only used during parsing.
193 |     PROTO_HAS_RETURN   = 0x20,    -- Already emitted a return.
194 |     PROTO_FIXUP_RETURN = 0x40,    -- Need to fixup emitted returns.
195 | }
196 | 
197 | local function proto_flags_string(flags)
198 |     local t = {}
199 |     for name, bit in pairs(PROTO_REF) do
200 |         if band(flags, bit) ~= 0 then t[#t+1] = name end
201 |     end
202 |     return #t > 0 and table.concat(t, "|") or "None"
203 | end
204 | 
205 | local function bytes_row(bytes, n)
206 |     local t = {}
207 |     local istart = (n - 1) * 8
208 |     for i = istart + 1, istart + 8 do
209 |         local b = bytes[i]
210 |         if not b then break end
211 |         t[#t+1] = format("%02x", b)
212 |     end
213 |     return #t, table.concat(t, " ")
214 | end
215 | 
216 | local function text_fragment(text, n)
217 |     local istart = (n - 1) * 46
218 |     local s = strsub(text, istart + 1, istart + 46)
219 |     return #s, s
220 | end
221 | 
222 | local function log(out, ls, fmt, ...)
223 |     local n = 1
224 |     local bcount, tlen = 0, 0
225 |     local text = format(fmt, ...)
226 |     repeat
227 |         local alen, a = bytes_row(ls.bytes, n)
228 |         local blen, b = text_fragment(text, n)
229 |         out:write(format("%-24s| %s\n", a, b))
230 |         bcount, tlen = bcount + alen, tlen + blen
231 |         n = n + 1
232 |     until bcount >= #ls.bytes and tlen >= #text
233 |     ls.bytes = {}
234 | end
235 | 
236 | local function chunkname_strip(s)
237 |     s = gsub(s, "^@", "")
238 |     s = gsub(s, ".+[/\\]", "")
239 |     return s
240 | end
241 | 
242 | local function proto_new(filename)
243 |     return {
244 |         kgc = {},
245 |         knum = {},
246 |         uv = {},
247 |         lineinfo = {},
248 |         uvinfo = {},
249 |         varinfo = {},
250 |         filename = filename,
251 |         firstline = 0,
252 |         numlines = 0,
253 |     }
254 | end
255 | 
256 | local function action(obj, method_name, ...)
257 |     local m = obj[method_name]
258 |     if m then m(obj, ...) end
259 | end
260 | 
261 | local function save_position(ls)
262 |     ls.bytes = {}
263 |     return {p = ls.p, n = ls.n}
264 | end
265 | 
266 | local function restore_position(ls, save)
267 |     ls.bytes = {}
268 |     ls.p, ls.n = save.p, save.n
269 | end
270 | 
271 | local function byte(ls, p)
272 |     p = p or ls.p
273 |     return strbyte(ls.data, p, p)
274 | end
275 | 
276 | local function bcread_need(ls, len)
277 |     if ls.n < len then
278 |         error("incomplete bytecode data")
279 |     end
280 | end
281 | 
282 | local function bcread_consume(ls, len)
283 |     assert(ls.n >= len, "incomplete bytecode data")
284 |     for p = ls.p, ls.p + len - 1 do
285 |         ls.bytes[#ls.bytes + 1] = byte(ls, p)
286 |     end
287 |     ls.n = ls.n - len
288 | end
289 | 
290 | local function bcread_dec(ls)
291 |     assert(ls.n > 0, "incomplete bytecode data")
292 |     local b = byte(ls)
293 |     ls.bytes[#ls.bytes + 1] = b
294 |     ls.n = ls.n - 1
295 |     return b
296 | end
297 | 
298 | local function bcread_byte(ls)
299 |     local b = bcread_dec(ls)
300 |     ls.p = ls.p + 1
301 |     return b
302 | end
303 | 
304 | local function bcread_uint16(ls)
305 |     local a, b = strbyte(ls.data, ls.p, ls.p + 1)
306 |     bcread_consume(ls, 2)
307 |     ls.p = ls.p + 2
308 |     return bor(shl(b, 8), a)
309 | end
310 | 
311 | local function bcread_uint32(ls)
312 |     local a, b, c, d = strbyte(ls.data, ls.p, ls.p + 3)
313 |     bcread_consume(ls, 4)
314 |     ls.p = ls.p + 4
315 |     return bor(shl(d, 24), shl(c, 16), shl(b, 8), a)
316 | end
317 | 
318 | local function bcread_string(ls)
319 |     local p = ls.p
320 |     while byte(ls, p) ~= 0 and ls.n > 0 do
321 |         p = p + 1
322 |     end
323 |     assert(byte(ls, p) == 0 and p > ls.p, "corrupted bytecode")
324 |     local s = strsub(ls.data, ls.p, p - 1)
325 |     local len = p - ls.p + 1
326 |     bcread_consume(ls, len)
327 |     ls.p = p + 1
328 |     return s
329 | end
330 | 
331 | local function bcread_uleb128(ls)
332 |     local v = bcread_byte(ls)
333 |     if v >= 0x80 then
334 |         local sh = 0
335 |         v = band(v, 0x7f)
336 |         repeat
337 |             local b = bcread_byte(ls)
338 |             v = bor(v, shl(band(b, 0x7f), sh + 7))
339 |             sh = sh + 7
340 |         until b < 0x80
341 |     end
342 |     return v
343 | end
344 | 
345 | -- Read top 32 bits of 33 bit ULEB128 value from buffer.
346 | local function bcread_uleb128_33(ls)
347 |     local v = shr(bcread_byte(ls), 1)
348 |     if v >= 0x40 then
349 |         local sh = -1
350 |         v = band(v, 0x3f)
351 |         repeat
352 |             local b = bcread_byte(ls)
353 |             v = bor(v, shl(band(b, 0x7f), sh + 7))
354 |             sh = sh + 7
355 |         until b < 0x80
356 |     end
357 |     return v
358 | end
359 | 
360 | local function bcread_mem(ls, len)
361 |     local s = strsub(ls.data, ls.p, ls.p + len - 1)
362 |     bcread_consume(ls, len)
363 |     ls.p = ls.p + len
364 |     return s
365 | end
366 | 
367 | local bcread_block = bcread_mem
368 | 
369 | 
370 | local function ctlsub(c)
371 |     if c == "\n" then return "\\n"
372 | elseif c == "\r" then return "\\r"
373 |     elseif c == "\t" then return "\\t"
374 |     else return format("\\%03d", byte(c))
375 |     end
376 | end
377 | 
378 | local function bcread_ins(ls)
379 |     local ins = bcread_uint32(ls)
380 |     local op = band(ins, 0xff)
381 |     return ins, BCMODE[op]
382 | end
383 | 
384 | -- Return one bytecode line.
385 | local function bcline(proto, pc, ins, m, prefix)
386 |     local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
387 |     local a = band(shr(ins, 8), 0xff)
388 |     local op = BC[band(ins, 0xff)]
389 |     local s = format("%04d %s %-6s %3s ", pc, prefix or "  ", op, ma == 0 and "" or a)
390 |     local d = shr(ins, 16)
391 |     if mc == 13*128 then -- BCMjump
392 |         return format("%s=> %04d", s, pc+d-0x7fff)
393 |     end
394 |     if mb ~= 0 then
395 |         d = band(d, 0xff)
396 |     elseif mc == 0 then
397 |         return s
398 |     end
399 |     local kc
400 |     if mc == 10*128 then -- BCMstr
401 |         local kgc = proto.kgc
402 |         kc = kgc[#kgc - d]
403 |         kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
404 |     elseif mc == 9*128 then -- BCMnum
405 |         kc = proto.knum[d+1]
406 |         if op == "TSETM " then kc = kc - 2^52 end
407 |     elseif mc == 12*128 then -- BCMfunc
408 |         local f = proto.kgc[#proto.kgc - d]
409 |         kc = format("%s:%d", f.filename, f.firstline)
410 |     elseif mc == 5*128 then -- BCMuv
411 |         kc = proto.uvinfo[d+1]
412 |     end
413 |     if ma == 5 then -- BCMuv
414 |         local ka = proto.uvinfo[a+1]
415 |         if kc then kc = ka.." ; "..kc else kc = ka end
416 |     end
417 |     if mb ~= 0 then
418 |         local b = shr(ins, 24)
419 |         if kc then return format("%s%3d %3d  ; %s", s, b, d, kc) end
420 |         return format("%s%3d %3d", s, b, d)
421 |     end
422 |     if kc then return format("%s%3d      ; %s", s, d, kc) end
423 |     if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits
424 |     return format("%s%3d", s, d)
425 | end
426 | 
427 | local function flags_string(flags)
428 |     local t = {}
429 |     if band(flags, BCDUMP.F_FFI) ~= 0 then t[#t+1] = "BCDUMP_F_FFI" end
430 |     if band(flags, BCDUMP.F_STRIP) ~= 0 then t[#t+1] = "BCDUMP_F_STRIP" end
431 |     return #t > 0 and table.concat(t, "|") or "None"
432 | end
433 | 
434 | local function bcread_bytecode(ls, target, sizebc)
435 |     action(target, "enter_bytecode", ls)
436 |     for pc = 1, sizebc - 1 do
437 |         local ins, m = bcread_ins(ls)
438 |         action(target, "ins", ls, pc, ins, m)
439 |     end
440 | end
441 | 
442 | local function uv_decode(uv)
443 |     if band(uv, 0x8000) ~= 0 then
444 |         local imm = (band(uv, 0x40) ~= 0)
445 |         return band(uv, 0x3fff), true, imm
446 |     else
447 |         return uv, false, false
448 |     end
449 | end
450 | 
451 | local function bcread_uv(ls, target, sizeuv)
452 |     action(target, "enter_uv", ls)
453 |     for i = 1, sizeuv do
454 |         local uv = bcread_uint16(ls)
455 |         action(target, "uv", ls, i, uv)
456 |     end
457 | end
458 | 
459 | local double_new = ffi.typeof('double[1]')
460 | local uint32_new = ffi.typeof('uint32_t[1]')
461 | local int64_new  = ffi.typeof('int64_t[1]')
462 | local uint64_new = ffi.typeof('uint64_t[1]')
463 | local complex    = ffi.typeof('complex')
464 | 
465 | local function dword_new_u32(cdata_new, lo, hi)
466 |     local value = cdata_new()
467 |     local char = ffi.cast('uint8_t*', value)
468 |     local u32_lo, u32_hi = uint32_new(lo), uint32_new(hi)
469 |     ffi.copy(char, u32_lo, 4)
470 |     ffi.copy(char + 4, u32_hi, 4)
471 |     return value[0]
472 | end
473 | 
474 | local function bcread_ktabk(ls, target)
475 |     local tp = bcread_uleb128(ls)
476 |     if tp >= BCDUMP_KTAB_STR then
477 |         local len = tp - BCDUMP_KTAB_STR
478 |         local str = bcread_mem(ls, len)
479 |         action(target, "ktabk", ls, "string", str)
480 |     elseif tp == BCDUMP_KTAB_INT then
481 |         local n = bcread_uleb128(ls)
482 |         action(target, "ktabk", ls, "int", n)
483 |     elseif tp == BCDUMP_KTAB_NUM then
484 |         local lo = bcread_uleb128(ls)
485 |         local hi = bcread_uleb128(ls)
486 |         local value = dword_new_u32(double_new, lo, hi)
487 |         action(target, "ktabk", ls, "num", value)
488 |     else
489 |         assert(tp <= BCDUMP_KTAB_TRUE)
490 |         action(target, "ktabk", ls, "pri", tp)
491 |     end
492 | end
493 | 
494 | local function bcread_ktab(ls, target)
495 |     local narray = bcread_uleb128(ls)
496 |     local nhash = bcread_uleb128(ls)
497 |     action(target, "ktab_dim", ls, narray, nhash)
498 |     for i = 1, narray do
499 |         bcread_ktabk(ls, target)
500 |     end
501 |     for i = 1, nhash do
502 |        bcread_ktabk(ls, target)
503 |        bcread_ktabk(ls, target)
504 |     end
505 |     return -1
506 | end
507 | 
508 | local function bcread_kgc(ls, target, sizekgc)
509 |     action(target, "enter_kgc", ls)
510 |     for i = 1, sizekgc do
511 |         local tp = bcread_uleb128(ls)
512 |         if tp >= BCDUMP_KGC_STR then
513 |             local len = tp - BCDUMP_KGC_STR
514 |             local str = bcread_mem(ls, len)
515 |             action(target, "kgc", ls, i, str)
516 |         elseif tp == BCDUMP_KGC_TAB then
517 |             local value = bcread_ktab(ls, target)
518 |             action(target, "kgc", ls, i, value)
519 |         elseif tp ~= BCDUMP_KGC_CHILD then
520 |             local lo0, hi0 = bcread_uleb128(ls), bcread_uleb128(ls)
521 |             if tp == BCDUMP_KGC_COMPLEX then
522 |                 local lo1, hi1 = bcread_uleb128(ls), bcread_uleb128(ls)
523 |                 local re = dword_new_u32(double_new, lo0, hi0)
524 |                 local im = dword_new_u32(double_new, lo1, hi1)
525 |                 action(target, "kgc", ls, i, complex(re, im))
526 |             else
527 |                 local cdata_new = tp == BCDUMP_KGC_I64 and int64_new or uint64_new
528 |                 local value = dword_new_u32(cdata_new, lo0, hi0)
529 |                 action(target, "kgc", ls, i, value)
530 |             end
531 |         else
532 |             action(target, "kgc", ls, i, 0)
533 |         end
534 |     end
535 | end
536 | 
537 | local function bcread_knum(ls, target, sizekn)
538 |     action(target, "enter_knum", ls)
539 |     for i = 1, sizekn do
540 |         local isnumbit = band(byte(ls), 1)
541 |         local lo = bcread_uleb128_33(ls)
542 |         if isnumbit ~= 0 then
543 |             local hi = bcread_uleb128(ls)
544 |             local value = dword_new_u32(double_new, lo, hi)
545 |             action(target, "knum", ls, i, "num", value)
546 |         else
547 |             action(target, "knum", ls, i, "int", lo)
548 |         end
549 |     end
550 | end
551 | 
552 | local function bcread_lineinfo(ls, target, firstline, numlines, sizebc, sizedbg)
553 |     if numlines < 256 then
554 |         for pc = 1, sizebc - 1 do
555 |             local line = bcread_byte(ls)
556 |             action(target, "lineinfo", ls, pc, firstline + line)
557 |         end
558 |     elseif numlines < 65536 then
559 |         for pc = 1, sizebc - 1 do
560 |             local line = bcread_uint16(ls)
561 |             action(target, "lineinfo", ls, pc, firstline + line)
562 |         end
563 |     else
564 |         for pc = 1, sizebc - 1 do
565 |             local line = bcread_uint32(ls)
566 |             action(target, "lineinfo", ls, pc, firstline + line)
567 |         end
568 |     end
569 | end
570 | 
571 | local function bcread_uvinfo(ls, target, sizeuv)
572 |     for i = 1, sizeuv do
573 |         local name = bcread_string(ls)
574 |         action(target, "uvinfo", ls, i, name)
575 |     end
576 | end
577 | 
578 | local VARNAME = {
579 |   "(for index)", "(for limit)", "(for step)", "(for generator)",
580 |   "(for state)", "(for control)"
581 | }
582 | 
583 | local function bcread_varinfo(ls, target)
584 |     local lastpc = 0
585 |     while true do
586 |         local vn = byte(ls)
587 |         local name
588 |         if vn < #VARNAME + 1 then
589 |             bcread_byte(ls)
590 |             if vn == 0 then break end
591 |             name = VARNAME[vn]
592 |         else
593 |             name = bcread_string(ls)
594 |         end
595 |         local startpc = lastpc + bcread_uleb128(ls)
596 |         local endpc = startpc + bcread_uleb128(ls)
597 |         action(target, "varinfo", ls, name, startpc, endpc)
598 |         lastpc = startpc
599 |     end
600 | end
601 | 
602 | local function bcread_dbg(ls, target, firstline, numlines, sizebc, sizeuv, sizedbg)
603 |     action(target, "enter_debug", ls)
604 |     bcread_lineinfo(ls, target, firstline, numlines, sizebc, sizedbg)
605 |     bcread_uvinfo(ls, target, sizeuv)
606 |     bcread_varinfo(ls, target)
607 | end
608 | 
609 | -- This function return an object used as target by bcread_* routines in the
610 | -- first pass of bytecode read. The role of this object is to acquire
611 | -- informations about kgc, knum, uv, jump targets etc.
612 | -- The informations are stored in the "proto" object and used by the "printer"
613 | -- object in the second pass.
614 | local function proto_info_target(target)
615 |     local proto = target.proto
616 |     local function knum(_, ls, i, tag, value)
617 |         proto.knum[i] = value
618 |     end
619 |     local function kgc(_, ls, i, value)
620 |         if value == 0 then
621 |             value = table.remove(target.childs)
622 |         end
623 |         proto.kgc[i] = value
624 |     end
625 |     local function uv(_, ls, i, value)
626 |         proto.uv[i] = value
627 |     end
628 |     local function lineinfo(_, ls, pc, line)
629 |         proto.lineinfo[pc] = line
630 |     end
631 |     local function uvinfo(_, ls, i, name)
632 |         proto.uvinfo[i] = name
633 |     end
634 |     local function varinfo(_, ls, name, startpc, endpc)
635 |         proto.varinfo[#proto.varinfo + 1] = {name, startpc, endpc}
636 |     end
637 |     local function enter_bytecode()
638 |         proto.target = {}
639 |     end
640 |     local function ins(_, ls, pc, ins, m)
641 |         if band(m, 15*128) == 13*128 then proto.target[pc+shr(ins, 16)-0x7fff] = true end
642 |     end
643 |     return {
644 |         knum = knum, kgc = kgc, uv = uv,
645 |         lineinfo = lineinfo, uvinfo = uvinfo, varinfo = varinfo,
646 |         enter_bytecode = enter_bytecode, ins = ins,
647 |     }
648 | end
649 | 
650 | local function bcread_proto(ls, target)
651 |     if ls.n > 0 and byte(ls) == 0 then
652 |         bcread_byte(ls)
653 |         action(target, "eof", ls)
654 |         return nil
655 |     end
656 |     action(target, "enter_proto", ls)
657 |     local proto = proto_new(chunkname_strip(target.chunkname))
658 |     target.proto = proto
659 |     local len = bcread_uleb128(ls)
660 |     local startn = ls.n
661 |     action(target, "proto_len", ls, len)
662 |     if len == 0 then return nil end
663 |     bcread_need(ls, len)
664 | 
665 |     -- Read prototype header.
666 |     local flags = bcread_byte(ls)
667 |     action(target, "proto_flags", ls, flags)
668 |     local numparams = bcread_byte(ls)
669 |     action(target, "proto_numparams", ls, numparams)
670 |     local framesize = bcread_byte(ls)
671 |     action(target, "proto_framesize", ls, framesize)
672 |     local sizeuv = bcread_byte(ls)
673 |     local sizekgc = bcread_uleb128(ls)
674 |     local sizekn = bcread_uleb128(ls)
675 |     local sizebc = bcread_uleb128(ls) + 1
676 |     action(target, "proto_sizes", ls, sizeuv, sizekgc, sizekn, sizebc)
677 | 
678 |     local sizedbg, firstline, numlines = 0, 0, 0
679 |     if band(ls.flags, BCDUMP.F_STRIP) == 0 then
680 |         sizedbg = bcread_uleb128(ls)
681 |         action(target, "proto_debug_size", ls, sizedbg)
682 |         if sizedbg > 0 then
683 |             firstline = bcread_uleb128(ls)
684 |             numlines = bcread_uleb128(ls)
685 |             proto.firstline, proto.numlines = firstline, numlines
686 |             action(target, "proto_lines", ls, firstline, numlines)
687 |         end
688 |     end
689 | 
690 |     local info = proto_info_target(target)
691 |     if info then
692 |         local save = save_position(ls)
693 |         bcread_bytecode(ls, info, sizebc)
694 |         bcread_uv(ls, info, sizeuv)
695 |         bcread_kgc(ls, info, sizekgc)
696 |         bcread_knum(ls, info, sizekn)
697 |         if sizedbg > 0 then
698 |             bcread_dbg(ls, info, firstline, numlines, sizebc, sizeuv, sizedbg)
699 |         end
700 |         restore_position(ls, save)
701 |     end
702 | 
703 |     bcread_bytecode(ls, target, sizebc)
704 |     bcread_uv(ls, target, sizeuv)
705 |     bcread_kgc(ls, target, sizekgc)
706 |     bcread_knum(ls, target, sizekn)
707 |     if sizedbg > 0 then
708 |         bcread_dbg(ls, target, firstline, numlines, sizebc, sizeuv, sizedbg)
709 |     end
710 | 
711 |     assert(len == startn - ls.n, "prototype bytecode size mismatch")
712 |     return target.proto
713 | end
714 | 
715 | local function bcread_header(ls, target)
716 |     if bcread_byte(ls) ~= BCDUMP.HEAD2 or bcread_byte(ls) ~= BCDUMP.HEAD3 or bcread_byte(ls) ~= BCDUMP.VERSION then
717 |         error("invalid header")
718 |     end
719 |     action(target, "header", ls)
720 |     local flags = bcread_uleb128(ls)
721 |     ls.flags = flags
722 |     action(target, "flags", ls, flags)
723 |     if band(flags, bnot(BCDUMP.F_KNOWN)) ~= 0 then
724 |         error("unknown flags")
725 |     end
726 |     if band(flags, BCDUMP.F_STRIP) == 0 then
727 |         local len = bcread_uleb128(ls)
728 |         bcread_need(ls, len)
729 |         target.chunkname = bcread_mem(ls, len)
730 |         action(target, "set_chunkname", ls, target.chunkname)
731 |     end
732 | end
733 | 
734 | -- The "printer" object is used to pretty-print on the screen the bytecode's
735 | -- hex dump side by side with the decoded meaning of each chunk of bytes.
736 | -- The routines bcread_* reads the bytecode and calls an appropriate "printer"
737 | -- method with the decoded informations. In turns the "printer" method write on
738 | -- the screen the bytes and the informations.
739 | -- The "printer" object assume that a "proto" field is available with some
740 | -- prototype's informations. The required informations includes kgc, knum, uv,
741 | -- debug name and line numbers.
742 | 
743 | local Printer = { }
744 | 
745 | function Printer:set_chunkname(ls, chunkname)
746 |     log(self.out, ls, format("Chunkname: %s", chunkname))
747 | end
748 | 
749 | function Printer:enter_proto(ls)
750 |     log(self.out, ls, ".. prototype ..")
751 | end
752 | 
753 | function Printer:header(ls) log(self.out, ls, "Header LuaJIT 2.0 BC") end
754 | function Printer:flags(ls, flags) log(self.out, ls, format("Flags: %s", flags_string(flags))) end
755 | function Printer:enter_kgc(ls) log(self.out, ls, ".. kgc ..") end
756 | function Printer:enter_knum(ls) log(self.out, ls, ".. knum ..") end
757 | function Printer:enter_bytecode(ls) log(self.out, ls, ".. bytecode ..") end
758 | function Printer:enter_uv(ls) log(self.out, ls, ".. uv ..") end
759 | function Printer:enter_debug(ls) log(self.out, ls, ".. debug ..") end
760 | function Printer:eof(ls) log(self.out, ls, "eof") end
761 | function Printer:proto_flags(ls, flags) log(self.out, ls, "prototype flags %s", proto_flags_string(flags)) end
762 | function Printer:proto_len(ls, len) log(self.out, ls, "prototype length %d", len) end
763 | function Printer:proto_numparams(ls, numparams) log(self.out, ls, "parameters number %d", numparams) end
764 | function Printer:proto_framesize(ls, framesize) log(self.out, ls, "framesize %d", framesize) end
765 | function Printer:proto_sizes(ls, sizeuv, sizekgc, sizekn, sizebc) log(self.out, ls, "size uv: %d kgc: %d kn: %d bc: %d", sizeuv, sizekgc, sizekn, sizebc) end
766 | function Printer:proto_debug_size(ls, sizedbg) log(self.out, ls, "debug size %d", sizedbg) end
767 | 
768 | function Printer:proto_lines(ls, firstline, numlines)
769 |     log(self.out, ls, "firstline: %d numline: %d", firstline, numlines)
770 | end
771 | 
772 | function Printer:ins(ls, pc, ins, m)
773 |     local s = bcline(self.proto, pc, ins, m, self.proto.target[pc] and "=>")
774 |     log(self.out, ls, "%s", s)
775 | end
776 | 
777 | function Printer:knum(ls, i, tag, num)
778 |     log(self.out, ls, "knum %s: %g", tag, num)
779 | end
780 | 
781 | function Printer:kgc(ls, i, value)
782 |     local str
783 |     if type(value) == "string" then
784 |         str = format("%q", value)
785 |     elseif value == 0 then
786 |         local pt = self.proto.kgc[i]
787 |         str = format("<function: %s:%d>", pt.filename, pt.firstline)
788 |     else
789 |         str = tostring(value)
790 |     end
791 |     log(self.out, ls, "kgc: %s", str)
792 | end
793 | 
794 | function Printer:ktab_dim(ls, narray, nhash)
795 |     log(self.out, ls, "ktab narray: %d nhash: %d", narray, nhash)
796 | end
797 | 
798 | function Printer:ktabk(ls, tag, value)
799 |     local ps = {"nil", "false", "true"}
800 |     local s = tag == "string" and format("%q", value) or (tag == "pri" and ps[value] or tostring(value))
801 |     log(self.out, ls, "ktabk %s: %s", tag, s)
802 | end
803 | 
804 | function Printer:uv(ls, i, value)
805 |     local uv, islocal, imm = uv_decode(value)
806 |     if islocal then
807 |         log(self.out, ls, "upvalue %slocal %d", imm and "(const) " or "", uv)
808 |     else
809 |         log(self.out, ls, "upvalue upper %d", uv)
810 |     end
811 | end
812 | 
813 | function Printer:lineinfo(ls, pc, line)
814 |     log(self.out, ls, "pc%03d: line %d", pc, line)
815 | end
816 | 
817 | function Printer:uvinfo(ls, i, name)
818 |     log(self.out, ls, "uv%d: name: %s", i - 1, name)
819 | end
820 | 
821 | function Printer:varinfo(ls, name, startpc, endpc)
822 |     log(self.out, ls, "var: %s pc: %d - %d", name, startpc, endpc)
823 | end
824 | 
825 | -- The BCList object is used to print the bytecode instructions as
826 | -- "luajit -bl" does.
827 | local BCList = { }
828 | 
829 | function BCList:enter_bytecode()
830 |     local pt = self.proto
831 |     self.out:write(format("-- BYTECODE -- %s:%d-%d\n", pt.filename, pt.firstline, pt.firstline + pt.numlines))
832 | end
833 | 
834 | function BCList:ins(ls, pc, ins, m)
835 |     local s = bcline(self.proto, pc, ins, m, self.proto.target[pc] and "=>")
836 |     self.out:write(s)
837 |     self.out:write("\n")
838 | end
839 | 
840 | function BCList:enter_uv() self.out:write("\n") end
841 | 
842 | local function printer_new(output, class, chunkname)
843 |     local p = { out = output, childs = {}, chunkname = chunkname }
844 |     return setmetatable(p, { __index = class })
845 | end
846 | 
847 | local function bcread(s, output, chunkname, hexdump)
848 |     local ls = {data = s, n = #s, p = 1, bytes = {}}
849 |     local printer = printer_new(output, hexdump and Printer or BCList, chunkname)
850 |     if bcread_byte(ls) ~= BCDUMP.HEAD1 then
851 |         return "invalid header beginning char"
852 |     end
853 |     bcread_header(ls, printer)
854 |     repeat
855 |         local pt = bcread_proto(ls, printer)
856 |         printer.childs[#printer.childs + 1] = pt
857 |     until not pt
858 |     if ls.n > 0 then
859 |         error("spurious bytecode")
860 |     end
861 | end
862 | 
863 | return { dump = bcread }
864 | 


--------------------------------------------------------------------------------
/bcsave.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | -- LuaJIT Language Toolkit.
  3 | --
  4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved.
  5 | --
  6 | -- See Copyright Notice in LICENSE
  7 | --
  8 | ----------------------------------------------------------------------------
  9 | --
 10 | -- Major portions taken verbatim or adapted from
 11 | -- LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
 12 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
 13 | --
 14 | -- LuaJIT module to save/list bytecode.
 15 | --
 16 | -- Copyright (C) 2005-2014 Mike Pall. All rights reserved.
 17 | -- Released under the MIT license. See Copyright Notice in luajit.h
 18 | ----------------------------------------------------------------------------
 19 | --
 20 | -- This module saves or lists the bytecode for an input file.
 21 | -- It's run by the -b command line option.
 22 | --
 23 | ------------------------------------------------------------------------------
 24 | 
 25 | local jit = require("jit")
 26 | local bit = require("bit")
 27 | 
 28 | -- Symbol name prefix for LuaJIT bytecode.
 29 | local LJBC_PREFIX = "luaJIT_BC_"
 30 | 
 31 | ------------------------------------------------------------------------------
 32 | 
 33 | local function usage()
 34 |     io.stderr:write[[
 35 | Save LuaJIT bytecode using the language toolkit.
 36 | Usage: luajit-x -b[options] input output
 37 |   -l        Only list bytecode.
 38 |   -x        Only list bytecode with extended informations.
 39 |   -s        Strip debug info (default).
 40 |   -g        Keep debug info.
 41 |   -n name   Set module name (default: auto-detect from input name).
 42 |   -t type   Set output file type (default: auto-detect from output name).
 43 |   -a arch   Override architecture for object files (default: native).
 44 |   -o os     Override OS for object files (default: native).
 45 |   -e chunk  Use chunk string as input.
 46 |   --        Stop handling options.
 47 |   -         Use stdin as input and/or stdout as output.
 48 | 
 49 | File types: c h obj o raw (default)
 50 | ]]
 51 |     os.exit(1)
 52 | end
 53 | 
 54 | local function check(ok, ...)
 55 |     if ok then return ok, ... end
 56 |     io.stderr:write("luajit lang toolkit: ", ...)
 57 |     io.stderr:write("\n")
 58 |     os.exit(1)
 59 | end
 60 | 
 61 | local function savefile(name, mode)
 62 |     if name == "-" then return io.stdout end
 63 |     return check(io.open(name, mode))
 64 | end
 65 | 
 66 | ------------------------------------------------------------------------------
 67 | 
 68 | local map_type = {
 69 |     raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
 70 | }
 71 | 
 72 | local map_arch = {
 73 |     x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
 74 |     mips = true, mipsel = true,
 75 | }
 76 | 
 77 | local map_os = {
 78 |     linux = true, windows = true, osx = true, freebsd = true, netbsd = true,
 79 |     openbsd = true, solaris = true,
 80 | }
 81 | 
 82 | local function checkarg(str, map, err)
 83 |     str = string.lower(str)
 84 |     local s = check(map[str], "unknown ", err)
 85 |     return s == true and str or s
 86 | end
 87 | 
 88 | local function detecttype(str)
 89 |     local ext = string.match(string.lower(str), "%.(%a+)$")
 90 |     return map_type[ext] or "raw"
 91 | end
 92 | 
 93 | local function checkmodname(str)
 94 |     check(string.match(str, "^[%w_.%-]+$"), "bad module name")
 95 |     return string.gsub(str, "[%.%-]", "_")
 96 | end
 97 | 
 98 | local function detectmodname(str)
 99 |     if type(str) == "string" then
100 |         local tail = string.match(str, "[^/\\]+$")
101 |         if tail then str = tail end
102 |         local head = string.match(str, "^(.*)%.[^.]*$")
103 |         if head then str = head end
104 |         str = string.match(str, "^[%w_.%-]+")
105 |     else
106 |         str = nil
107 |     end
108 |     check(str, "cannot derive module name, use -n name")
109 |     return string.gsub(str, "[%.%-]", "_")
110 | end
111 | 
112 | ------------------------------------------------------------------------------
113 | 
114 | local function bcsave_tail(fp, output, s)
115 |     local ok, err = fp:write(s)
116 |     if ok and output ~= "-" then ok, err = fp:close() end
117 |     check(ok, "cannot write ", output, ": ", err)
118 | end
119 | 
120 | local function bcsave_raw(output, s)
121 |     local fp = savefile(output, "wb")
122 |     bcsave_tail(fp, output, s)
123 | end
124 | 
125 | local function bcsave_c(ctx, output, s)
126 |     local fp = savefile(output, "w")
127 |     if ctx.type == "c" then
128 |         fp:write(string.format([[
129 | #ifdef _cplusplus
130 | extern "C"
131 | #endif
132 | #ifdef _WIN32
133 | __declspec(dllexport)
134 | #endif
135 | const char %s%s[] = {
136 | ]], LJBC_PREFIX, ctx.modname))
137 |     else
138 |         fp:write(string.format([[
139 | #define %s%s_SIZE %d
140 | static const char %s%s[] = {
141 | ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
142 |     end
143 |     local t, n, m = {}, 0, 0
144 |     for i=1,#s do
145 |         local b = tostring(string.byte(s, i))
146 |         m = m + #b + 1
147 |         if m > 78 then
148 |             fp:write(table.concat(t, ",", 1, n), ",\n")
149 |             n, m = 0, #b + 1
150 |         end
151 |         n = n + 1
152 |         t[n] = b
153 |     end
154 |     bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n")
155 | end
156 | 
157 | local function bcsave_elfobj(ctx, output, s, ffi)
158 |     ffi.cdef[[
159 | typedef struct {
160 |     uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7];
161 |     uint16_t type, machine;
162 |     uint32_t version;
163 |     uint32_t entry, phofs, shofs;
164 |     uint32_t flags;
165 |     uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx;
166 | } ELF32header;
167 | typedef struct {
168 |     uint8_t emagic[4], eclass, eendian, eversion, eosabi, eabiversion, epad[7];
169 |     uint16_t type, machine;
170 |     uint32_t version;
171 |     uint64_t entry, phofs, shofs;
172 |     uint32_t flags;
173 |     uint16_t ehsize, phentsize, phnum, shentsize, shnum, shstridx;
174 | } ELF64header;
175 | typedef struct {
176 |     uint32_t name, type, flags, addr, ofs, size, link, info, align, entsize;
177 | } ELF32sectheader;
178 | typedef struct {
179 |     uint32_t name, type;
180 |     uint64_t flags, addr, ofs, size;
181 |     uint32_t link, info;
182 |     uint64_t align, entsize;
183 | } ELF64sectheader;
184 | typedef struct {
185 |     uint32_t name, value, size;
186 |     uint8_t info, other;
187 |     uint16_t sectidx;
188 | } ELF32symbol;
189 | typedef struct {
190 |     uint32_t name;
191 |     uint8_t info, other;
192 |     uint16_t sectidx;
193 |     uint64_t value, size;
194 | } ELF64symbol;
195 | typedef struct {
196 |     ELF32header hdr;
197 |     ELF32sectheader sect[6];
198 |     ELF32symbol sym[2];
199 |     uint8_t space[4096];
200 | } ELF32obj;
201 | typedef struct {
202 |     ELF64header hdr;
203 |     ELF64sectheader sect[6];
204 |     ELF64symbol sym[2];
205 |     uint8_t space[4096];
206 | } ELF64obj;
207 | ]]
208 |     local symname = LJBC_PREFIX..ctx.modname
209 |     local is64, isbe = false, false
210 |     if ctx.arch == "x64" then
211 |         is64 = true
212 |     elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
213 |         isbe = true
214 |     end
215 | 
216 |     -- Handle different host/target endianess.
217 |     local function f32(x) return x end
218 |     local f16, fofs = f32, f32
219 |     if ffi.abi("be") ~= isbe then
220 |         f32 = bit.bswap
221 |         function f16(x) return bit.rshift(bit.bswap(x), 16) end
222 |         if is64 then
223 |             local two32 = ffi.cast("int64_t", 2^32)
224 |             function fofs(x) return bit.bswap(x)*two32 end
225 |         else
226 |             fofs = f32
227 |         end
228 |     end
229 | 
230 |     -- Create ELF object and fill in header.
231 |     local o = ffi.new(is64 and "ELF64obj" or "ELF32obj")
232 |     local hdr = o.hdr
233 |     if ctx.os == "bsd" or ctx.os == "other" then -- Determine native hdr.eosabi.
234 |         local bf = assert(io.open("/bin/ls", "rb"))
235 |         local bs = bf:read(9)
236 |         bf:close()
237 |         ffi.copy(o, bs, 9)
238 |         check(hdr.emagic[0] == 127, "no support for writing native object files")
239 |     else
240 |         hdr.emagic = "\127ELF"
241 |         hdr.eosabi = ({ freebsd=9, netbsd=2, openbsd=12, solaris=6 })[ctx.os] or 0
242 |     end
243 |     hdr.eclass = is64 and 2 or 1
244 |     hdr.eendian = isbe and 2 or 1
245 |     hdr.eversion = 1
246 |     hdr.type = f16(1)
247 |     hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
248 |     if ctx.arch == "mips" or ctx.arch == "mipsel" then
249 |         hdr.flags = 0x50001006
250 |     end
251 |     hdr.version = f32(1)
252 |     hdr.shofs = fofs(ffi.offsetof(o, "sect"))
253 |     hdr.ehsize = f16(ffi.sizeof(hdr))
254 |     hdr.shentsize = f16(ffi.sizeof(o.sect[0]))
255 |     hdr.shnum = f16(6)
256 |     hdr.shstridx = f16(2)
257 | 
258 |     -- Fill in sections and symbols.
259 |     local sofs, ofs = ffi.offsetof(o, "space"), 1
260 |     for i,name in ipairs{
261 |             ".symtab", ".shstrtab", ".strtab", ".rodata", ".note.GNU-stack",
262 |         } do
263 |         local sect = o.sect[i]
264 |         sect.align = fofs(1)
265 |         sect.name = f32(ofs)
266 |         ffi.copy(o.space+ofs, name)
267 |         ofs = ofs + #name+1
268 |     end
269 |     o.sect[1].type = f32(2) -- .symtab
270 |     o.sect[1].link = f32(3)
271 |     o.sect[1].info = f32(1)
272 |     o.sect[1].align = fofs(8)
273 |     o.sect[1].ofs = fofs(ffi.offsetof(o, "sym"))
274 |     o.sect[1].entsize = fofs(ffi.sizeof(o.sym[0]))
275 |     o.sect[1].size = fofs(ffi.sizeof(o.sym))
276 |     o.sym[1].name = f32(1)
277 |     o.sym[1].sectidx = f16(4)
278 |     o.sym[1].size = fofs(#s)
279 |     o.sym[1].info = 17
280 |     o.sect[2].type = f32(3) -- .shstrtab
281 |     o.sect[2].ofs = fofs(sofs)
282 |     o.sect[2].size = fofs(ofs)
283 |     o.sect[3].type = f32(3) -- .strtab
284 |     o.sect[3].ofs = fofs(sofs + ofs)
285 |     o.sect[3].size = fofs(#symname+1)
286 |     ffi.copy(o.space+ofs+1, symname)
287 |     ofs = ofs + #symname + 2
288 |     o.sect[4].type = f32(1) -- .rodata
289 |     o.sect[4].flags = fofs(2)
290 |     o.sect[4].ofs = fofs(sofs + ofs)
291 |     o.sect[4].size = fofs(#s)
292 |     o.sect[5].type = f32(1) -- .note.GNU-stack
293 |     o.sect[5].ofs = fofs(sofs + ofs + #s)
294 | 
295 |     -- Write ELF object file.
296 |     local fp = savefile(output, "wb")
297 |     fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs))
298 |     bcsave_tail(fp, output, s)
299 | end
300 | 
301 | local function bcsave_peobj(ctx, output, s, ffi)
302 |     ffi.cdef[[
303 | typedef struct {
304 |     uint16_t arch, nsects;
305 |     uint32_t time, symtabofs, nsyms;
306 |     uint16_t opthdrsz, flags;
307 | } PEheader;
308 | typedef struct {
309 |     char name[8];
310 |     uint32_t vsize, vaddr, size, ofs, relocofs, lineofs;
311 |     uint16_t nreloc, nline;
312 |     uint32_t flags;
313 | } PEsection;
314 | typedef struct __attribute((packed)) {
315 |     union {
316 |         char name[8];
317 |         uint32_t nameref[2];
318 |     };
319 |     uint32_t value;
320 |     int16_t sect;
321 |     uint16_t type;
322 |     uint8_t scl, naux;
323 | } PEsym;
324 | typedef struct __attribute((packed)) {
325 |     uint32_t size;
326 |     uint16_t nreloc, nline;
327 |     uint32_t cksum;
328 |     uint16_t assoc;
329 |     uint8_t comdatsel, unused[3];
330 | } PEsymaux;
331 | typedef struct {
332 |     PEheader hdr;
333 |     PEsection sect[2];
334 |     // Must be an even number of symbol structs.
335 |     PEsym sym0;
336 |     PEsymaux sym0aux;
337 |     PEsym sym1;
338 |     PEsymaux sym1aux;
339 |     PEsym sym2;
340 |     PEsym sym3;
341 |     uint32_t strtabsize;
342 |     uint8_t space[4096];
343 | } PEobj;
344 | ]]
345 |     local symname = LJBC_PREFIX..ctx.modname
346 |     local is64 = false
347 |     if ctx.arch == "x86" then
348 |         symname = "_"..symname
349 |     elseif ctx.arch == "x64" then
350 |         is64 = true
351 |     end
352 |     local symexport = "   /EXPORT:"..symname..",DATA "
353 | 
354 |     -- The file format is always little-endian. Swap if the host is big-endian.
355 |     local function f32(x) return x end
356 |     local f16 = f32
357 |     if ffi.abi("be") then
358 |         f32 = bit.bswap
359 |         function f16(x) return bit.rshift(bit.bswap(x), 16) end
360 |     end
361 | 
362 |     -- Create PE object and fill in header.
363 |     local o = ffi.new("PEobj")
364 |     local hdr = o.hdr
365 |     hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch])
366 |     hdr.nsects = f16(2)
367 |     hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
368 |     hdr.nsyms = f32(6)
369 | 
370 |     -- Fill in sections and symbols.
371 |     o.sect[0].name = ".drectve"
372 |     o.sect[0].size = f32(#symexport)
373 |     o.sect[0].flags = f32(0x00100a00)
374 |     o.sym0.sect = f16(1)
375 |     o.sym0.scl = 3
376 |     o.sym0.name = ".drectve"
377 |     o.sym0.naux = 1
378 |     o.sym0aux.size = f32(#symexport)
379 |     o.sect[1].name = ".rdata"
380 |     o.sect[1].size = f32(#s)
381 |     o.sect[1].flags = f32(0x40300040)
382 |     o.sym1.sect = f16(2)
383 |     o.sym1.scl = 3
384 |     o.sym1.name = ".rdata"
385 |     o.sym1.naux = 1
386 |     o.sym1aux.size = f32(#s)
387 |     o.sym2.sect = f16(2)
388 |     o.sym2.scl = 2
389 |     o.sym2.nameref[1] = f32(4)
390 |     o.sym3.sect = f16(-1)
391 |     o.sym3.scl = 2
392 |     o.sym3.value = f32(1)
393 |     o.sym3.name = "@feat.00" -- Mark as SafeSEH compliant.
394 |     ffi.copy(o.space, symname)
395 |     local ofs = #symname + 1
396 |     o.strtabsize = f32(ofs + 4)
397 |     o.sect[0].ofs = f32(ffi.offsetof(o, "space") + ofs)
398 |     ffi.copy(o.space + ofs, symexport)
399 |     ofs = ofs + #symexport
400 |     o.sect[1].ofs = f32(ffi.offsetof(o, "space") + ofs)
401 | 
402 |     -- Write PE object file.
403 |     local fp = savefile(output, "wb")
404 |     fp:write(ffi.string(o, ffi.sizeof(o)-4096+ofs))
405 |     bcsave_tail(fp, output, s)
406 | end
407 | 
408 | local function bcsave_machobj(ctx, output, s, ffi)
409 |     ffi.cdef[[
410 | typedef struct
411 | {
412 |     uint32_t magic, cputype, cpusubtype, filetype, ncmds, sizeofcmds, flags;
413 | } mach_header;
414 | typedef struct
415 | {
416 |     mach_header; uint32_t reserved;
417 | } mach_header_64;
418 | typedef struct {
419 |     uint32_t cmd, cmdsize;
420 |     char segname[16];
421 |     uint32_t vmaddr, vmsize, fileoff, filesize;
422 |     uint32_t maxprot, initprot, nsects, flags;
423 | } mach_segment_command;
424 | typedef struct {
425 |     uint32_t cmd, cmdsize;
426 |     char segname[16];
427 |     uint64_t vmaddr, vmsize, fileoff, filesize;
428 |     uint32_t maxprot, initprot, nsects, flags;
429 | } mach_segment_command_64;
430 | typedef struct {
431 |     char sectname[16], segname[16];
432 |     uint32_t addr, size;
433 |     uint32_t offset, align, reloff, nreloc, flags;
434 |     uint32_t reserved1, reserved2;
435 | } mach_section;
436 | typedef struct {
437 |     char sectname[16], segname[16];
438 |     uint64_t addr, size;
439 |     uint32_t offset, align, reloff, nreloc, flags;
440 |     uint32_t reserved1, reserved2, reserved3;
441 | } mach_section_64;
442 | typedef struct {
443 |     uint32_t cmd, cmdsize, symoff, nsyms, stroff, strsize;
444 | } mach_symtab_command;
445 | typedef struct {
446 |     int32_t strx;
447 |     uint8_t type, sect;
448 |     int16_t desc;
449 |     uint32_t value;
450 | } mach_nlist;
451 | typedef struct {
452 |     uint32_t strx;
453 |     uint8_t type, sect;
454 |     uint16_t desc;
455 |     uint64_t value;
456 | } mach_nlist_64;
457 | typedef struct
458 | {
459 |     uint32_t magic, nfat_arch;
460 | } mach_fat_header;
461 | typedef struct
462 | {
463 |     uint32_t cputype, cpusubtype, offset, size, align;
464 | } mach_fat_arch;
465 | typedef struct {
466 |     struct {
467 |         mach_header hdr;
468 |         mach_segment_command seg;
469 |         mach_section sec;
470 |         mach_symtab_command sym;
471 |     } arch[1];
472 |     mach_nlist sym_entry;
473 |     uint8_t space[4096];
474 | } mach_obj;
475 | typedef struct {
476 |     struct {
477 |         mach_header_64 hdr;
478 |         mach_segment_command_64 seg;
479 |         mach_section_64 sec;
480 |         mach_symtab_command sym;
481 |     } arch[1];
482 |     mach_nlist_64 sym_entry;
483 |     uint8_t space[4096];
484 | } mach_obj_64;
485 | typedef struct {
486 |     mach_fat_header fat;
487 |     mach_fat_arch fat_arch[4];
488 |     struct {
489 |         mach_header hdr;
490 |         mach_segment_command seg;
491 |         mach_section sec;
492 |         mach_symtab_command sym;
493 |     } arch[4];
494 |     mach_nlist sym_entry;
495 |     uint8_t space[4096];
496 | } mach_fat_obj;
497 | ]]
498 |     local symname = '_'..LJBC_PREFIX..ctx.modname
499 |     local isfat, is64, align, mobj = false, false, 4, "mach_obj"
500 |     if ctx.arch == "x64" then
501 |         is64, align, mobj = true, 8, "mach_obj_64"
502 |     elseif ctx.arch == "arm" then
503 |         isfat, mobj = true, "mach_fat_obj"
504 |     else
505 |         check(ctx.arch == "x86", "unsupported architecture for OSX")
506 |     end
507 |     local function aligned(v, a) return bit.band(v+a-1, -a) end
508 |     local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
509 | 
510 |     -- Create Mach-O object and fill in header.
511 |     local o = ffi.new(mobj)
512 |     local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
513 |     local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
514 |     local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
515 |     if isfat then
516 |         o.fat.magic = be32(0xcafebabe)
517 |         o.fat.nfat_arch = be32(#cpusubtype)
518 |     end
519 | 
520 |     -- Fill in sections and symbols.
521 |     for i=0,#cpusubtype-1 do
522 |         local ofs = 0
523 |         if isfat then
524 |             local a = o.fat_arch[i]
525 |             a.cputype = be32(cputype[i+1])
526 |             a.cpusubtype = be32(cpusubtype[i+1])
527 |             -- Subsequent slices overlap each other to share data.
528 |             ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0])
529 |             a.offset = be32(ofs)
530 |             a.size = be32(mach_size-ofs+#s)
531 |         end
532 |         local a = o.arch[i]
533 |         a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface
534 |         a.hdr.cputype = cputype[i+1]
535 |         a.hdr.cpusubtype = cpusubtype[i+1]
536 |         a.hdr.filetype = 1
537 |         a.hdr.ncmds = 2
538 |         a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym)
539 |         a.seg.cmd = is64 and 0x19 or 0x1
540 |         a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)
541 |         a.seg.vmsize = #s
542 |         a.seg.fileoff = mach_size-ofs
543 |         a.seg.filesize = #s
544 |         a.seg.maxprot = 1
545 |         a.seg.initprot = 1
546 |         a.seg.nsects = 1
547 |         ffi.copy(a.sec.sectname, "__data")
548 |         ffi.copy(a.sec.segname, "__DATA")
549 |         a.sec.size = #s
550 |         a.sec.offset = mach_size-ofs
551 |         a.sym.cmd = 2
552 |         a.sym.cmdsize = ffi.sizeof(a.sym)
553 |         a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
554 |         a.sym.nsyms = 1
555 |         a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
556 |         a.sym.strsize = aligned(#symname+2, align)
557 |     end
558 |     o.sym_entry.type = 0xf
559 |     o.sym_entry.sect = 1
560 |     o.sym_entry.strx = 1
561 |     ffi.copy(o.space+1, symname)
562 | 
563 |     -- Write Macho-O object file.
564 |     local fp = savefile(output, "wb")
565 |     fp:write(ffi.string(o, mach_size))
566 |     bcsave_tail(fp, output, s)
567 | end
568 | 
569 | local function bcsave_obj(ctx, output, s)
570 |     local ok, ffi = pcall(require, "ffi")
571 |     check(ok, "FFI library required to write this file type")
572 |     if ctx.os == "windows" then
573 |         return bcsave_peobj(ctx, output, s, ffi)
574 |     elseif ctx.os == "osx" then
575 |         return bcsave_machobj(ctx, output, s, ffi)
576 |     else
577 |         return bcsave_elfobj(ctx, output, s, ffi)
578 |     end
579 | end
580 | 
581 | ------------------------------------------------------------------------------
582 | 
583 | local function bc_magic_header(input)
584 |     local f, err = io.open(input, "rb")
585 |     check(f, "cannot open ", err)
586 |     local header = f:read(4)
587 |     local match = (header == string.char(0x1b, 0x4c, 0x4a, 0x01))
588 |     f:close()
589 |     return match
590 | end
591 | 
592 | 
593 | local function bccompile(ctx, input)
594 |     local compile = require("sci-lang.compile")
595 |     local ok, bcstring
596 |     if ctx.string_input then
597 |         ok, bcstring = compile.string(input)
598 |         check(ok, "cannot compile string:", input)
599 |     else
600 |         if input == "-" then
601 |             ok, bcstring = compile.file()
602 |         else
603 |             if bc_magic_header(input) then
604 |                 local f = io.open(input, "rb")
605 |                 check(f, "cannot open file")
606 |                 ok, bcstring = true, f:read("*a")
607 |                 f:close()
608 |             else
609 |                 ok, bcstring = compile.file(input)
610 |             end
611 |         end
612 |         check(ok, "cannot compile file:", input)
613 |     end
614 |     return bcstring
615 | end
616 | 
617 | local function bclist(ctx, input, output)
618 |     local s = bccompile(ctx, input)
619 |     require("sci-lang.bcread").dump(s, savefile(output, "w"), input, ctx.hexdump)
620 | end
621 | 
622 | local function bcsave(ctx, input, output)
623 |     -- TODO: implement the ctx.strip option
624 |     local s = bccompile(ctx, input)
625 |     local t = ctx.type
626 |     if not t then
627 |         t = detecttype(output)
628 |         ctx.type = t
629 |     end
630 |     if t == "raw" then
631 |         bcsave_raw(output, s)
632 |     else
633 |         if not ctx.modname then ctx.modname = detectmodname(input) end
634 |         if t == "obj" then
635 |             bcsave_obj(ctx, output, s)
636 |         else
637 |             bcsave_c(ctx, output, s)
638 |         end
639 |     end
640 | end
641 | 
642 | -- Process -b command line option.
643 | local function docmd(...)
644 |     local arg = {...}
645 |     local n = 1
646 |     local list = false
647 |     local ctx = {
648 |         strip = true, arch = jit.arch, os = string.lower(jit.os),
649 |         type = false, modname = false, hexdump = false, string_input = false,
650 |     }
651 |     while n <= #arg do
652 |         local a = arg[n]
653 |         if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then
654 |             table.remove(arg, n)
655 |             if a == "--" then break end
656 |             for m=2,#a do
657 |                 local opt = string.sub(a, m, m)
658 |                 if opt == "l" then
659 |                     list = true
660 |                 elseif opt == "s" then
661 |                     ctx.strip = true
662 |                 elseif opt == "g" then
663 |                     ctx.strip = false
664 |                 elseif opt == "x" then
665 |                     list = true
666 |                     ctx.hexdump = true
667 |                 else
668 |                     if arg[n] == nil or m ~= #a then usage() end
669 |                     if opt == "e" then
670 |                         if n ~= 1 then usage() end
671 |                         ctx.string_input = true
672 |                     elseif opt == "n" then
673 |                         ctx.modname = checkmodname(table.remove(arg, n))
674 |                     elseif opt == "t" then
675 |                         ctx.type = checkarg(table.remove(arg, n), map_type, "file type")
676 |                     elseif opt == "a" then
677 |                         ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture")
678 |                     elseif opt == "o" then
679 |                         ctx.os = checkarg(table.remove(arg, n), map_os, "OS name")
680 |                     else
681 |                         usage()
682 |                     end
683 |                 end
684 |             end
685 |         else
686 |             n = n + 1
687 |         end
688 |     end
689 |     if list then
690 |         if #arg == 0 or #arg > 2 then usage() end
691 |         bclist(ctx, arg[1], arg[2] or "-")
692 |     else
693 |         if #arg ~= 2 then usage() end
694 |         bcsave(ctx, arg[1], arg[2])
695 |     end
696 | end
697 | 
698 | return { start = docmd }
699 | 


--------------------------------------------------------------------------------
/compile.lua:
--------------------------------------------------------------------------------
 1 | local lex_setup = require('sci-lang.lexer')
 2 | local parse = require('sci-lang.parser')
 3 | local ast = require('sci-lang.lua-ast').New()
 4 | local reader = require('sci-lang.reader')
 5 | local transform = require ('sci-lang.transform')
 6 | 
 7 | -- Two kind of backend can be used to generate the code from the AST:
 8 | -- - "generator", generates LuaJIT bytecode
 9 | -- - "luacode-generator", generates Lua code
10 | --
11 | -- Both can be used interchangeably, they take the AST tree and produce
12 | -- a string that can be passed to the function "loadstring".
13 | -- In the case of the bytecode generator the string will be actually a
14 | -- binary blob that corresponds to the generated bytecode.
15 | 
16 | 
17 | local function lang_toolkit_error(msg)
18 |    if string.sub(msg, 1, 9) == "LLT-ERROR" then
19 |         return false, "luajit-lang-toolkit: " .. string.sub(msg, 10)
20 |     else
21 |         error(msg)
22 |     end
23 | end
24 | 
25 | local function compile(reader, filename, options)
26 |     local generator
27 |     if options and options.code then
28 |         generator = require('sci-lang.luacode-generator')
29 |     else
30 |         generator = require('sci-lang.generator')
31 |     end
32 |     local ls = lex_setup(reader, filename)
33 |     local parse_success, tree = pcall(parse, ast, ls)
34 |     if not parse_success then
35 |         return lang_toolkit_error(tree)
36 |     end
37 |     local ttree = transform.root(tree)
38 |     ttree = ttree or tree -- If nothing is returned, it's in-place transform.
39 |     local success, luacode = pcall(generator, ttree, filename)
40 |     if not success then
41 |         return lang_toolkit_error(luacode)
42 |     end
43 |     return true, luacode
44 | end
45 | 
46 | local function lang_loadstring(src, filename, options)
47 |     return compile(reader.string(src), filename or "stdin", options)
48 | end
49 | 
50 | local function lang_loadfile(filename, options)
51 |     return compile(reader.file(filename), filename or "stdin", options)
52 | end
53 | 
54 | return { string = lang_loadstring, file = lang_loadfile }
55 | 


--------------------------------------------------------------------------------
/generator.lua:
--------------------------------------------------------------------------------
   1 | --
   2 | -- LuaJIT Language Toolkit.
   3 | --
   4 | -- Copyright (C) 2013-2014 Francesco Abbate. All rights reserved.
   5 | --
   6 | -- Based on the original work of Richard Hundt,
   7 | -- https://github.com/richardhundt/nyanga.
   8 | --
   9 | -- See Copyright Notice in LICENSE
  10 | --
  11 | 
  12 | local bc = require('sci-lang.bytecode')
  13 | local const_eval = require("sci-lang.ast-const-eval")
  14 | local boolean_const_eval = require("sci-lang.ast-boolean-const-eval")
  15 | 
  16 | local ID = 0
  17 | local function genid()
  18 |    ID = ID + 1
  19 |    return '__'..ID
  20 | end
  21 | 
  22 | local BC = bc.BC
  23 | 
  24 | -- comparison operators with corresponding instruction.
  25 | -- the boolean value indicate if the operands should be swapped.
  26 | local cmpop = {
  27 |     ['<' ] = { 'LT', false },
  28 |     ['>' ] = { 'LT', true  },
  29 |     ['<='] = { 'LE', false },
  30 |     ['>='] = { 'LE', true  },
  31 |     ['=='] = { 'EQ', false },
  32 |     ['~='] = { 'NE', false },
  33 | }
  34 | 
  35 | -- the same of above but for the inverse tests
  36 | local cmpopinv = {
  37 |     ['<' ] = { 'GE', false },
  38 |     ['>' ] = { 'GE', true  },
  39 |     ['<='] = { 'GT', false },
  40 |     ['>='] = { 'GT', true  },
  41 |     ['=='] = { 'NE', false },
  42 |     ['~='] = { 'EQ', false },
  43 | }
  44 | 
  45 | local function lang_error(msg, chunkname, line)
  46 |     error(string.format("LLT-ERROR%s:%d: %s", chunkname, line, msg), 0)
  47 | end
  48 | 
  49 | local MULTIRES = -1
  50 | 
  51 | -- this should be considered like binary values to perform
  52 | -- bitfield operations
  53 | local EXPR_RESULT_TRUE, EXPR_RESULT_FALSE = 1, 2
  54 | local EXPR_RESULT_BOTH = 3
  55 | 
  56 | -- Infix arithmetic instructions
  57 | local EXPR_EMIT_VN    = { value = true, number = true }
  58 | 
  59 | -- USETx, ISEQx and ISNEx instructions
  60 | local EXPR_EMIT_VSNP = { value = true, string = true, number = true, primitive = true }
  61 | 
  62 | -- TGETx/TSETx instructions
  63 | local EXPR_EMIT_VSB  = { value = true, string = true, byte = true }
  64 | 
  65 | local function store_bit(cond)
  66 |     return cond and EXPR_RESULT_TRUE or EXPR_RESULT_FALSE
  67 | end
  68 | 
  69 | -- Logical XOR (exclusive OR)
  70 | local function xor(a, b)
  71 |     return (a and not b) or (not a and b)
  72 | end
  73 | 
  74 | local StatementRule = { }
  75 | local ExpressionRule = { }
  76 | local MultiExprRule = { }
  77 | local LHSExpressionRule = { }
  78 | local TestRule = { }
  79 | 
  80 | local function is_literal(node)
  81 |     return node.kind == 'Literal'
  82 | end
  83 | 
  84 | local function is_identifier(node)
  85 |     return node.kind == 'Identifier'
  86 | end
  87 | 
  88 | local function is_local_var(ctx, node)
  89 |     if node.kind == 'Identifier' then
  90 |         local info, uval = ctx:lookup(node.name)
  91 |         if info and not uval then
  92 |             return info.idx
  93 |         end
  94 |     end
  95 | end
  96 | 
  97 | local function is_vcall(node)
  98 |     return (MultiExprRule[node.kind] ~= nil)
  99 | end
 100 | 
 101 | local function mov_toreg(ctx, dest, src)
 102 |     if dest ~= src then
 103 |         ctx:op_move(dest, src)
 104 |     end
 105 | end
 106 | 
 107 | -- Conditionally move "src" to "dest" and jump to given target
 108 | -- if "src" evaluate to true/false according to "cond".
 109 | local function cond_mov_toreg(ctx, cond, dest, src, jump_label, jreg)
 110 |     if dest ~= src then
 111 |         ctx:op_testmov(cond, dest, src, jump_label, jreg)
 112 |     else
 113 |         ctx:op_test(cond, src, jump_label, jreg)
 114 |     end
 115 | end
 116 | 
 117 | local function is_byte_number(v)
 118 |     return type(v) == 'number' and v % 1 == 0 and v >= 0 and v < 256
 119 | end
 120 | 
 121 | -- ExpressionRule's entries take a node and a destination register (dest)
 122 | -- used to store the result. At the end of the call no new registers are
 123 | -- marked as used.
 124 | -- ExpressionRule functions return nothing or a boolean value to indicate if
 125 | -- a the expression terminate with a tail call instruction.
 126 | 
 127 | function ExpressionRule:Literal(node, dest)
 128 |     self.ctx:op_load(dest, node.value)
 129 | end
 130 | 
 131 | function ExpressionRule:Identifier(node, dest)
 132 |     local name = node.name
 133 |     local var, uval = self.ctx:lookup(name)
 134 |     if var then
 135 |         if uval then
 136 |             -- Ensure variable is marked as upvalue in proto in take
 137 |             -- the upvalue index.
 138 |             local uv = self.ctx:upval(name)
 139 |             self.ctx:op_uget(dest, uv)
 140 |         else
 141 |             mov_toreg(self.ctx, dest, var.idx)
 142 |         end
 143 |     else
 144 |         self.ctx:op_gget(dest, name)
 145 |     end
 146 | end
 147 | 
 148 | function ExpressionRule:Vararg(node, dest)
 149 |     self.ctx:op_varg(dest, 1)
 150 | end
 151 | 
 152 | -- MultiExprRule's entries take a node and a number of wanted results (want)
 153 | -- and an optional boolean argument "tail" that indicate to emit tail call
 154 | -- if possible.
 155 | -- The argument "want" can also be MULTIRES to indicate that the caller want
 156 | -- as many results as the instructions returns.
 157 | -- The code will store on the stack (starting from freereg) the number of
 158 | -- wanted results.
 159 | -- Return a first boolean value to indicate if many results are generated.
 160 | -- A second boolean value indicate if a tail call was actually done.
 161 | 
 162 | function MultiExprRule:Vararg(node, want)
 163 |     self.ctx:op_varg(self.ctx.freereg, want)
 164 |     return true, false -- Multiple results, no tail call.
 165 | end
 166 | 
 167 | local function expr_isk(self, node)
 168 |     local const = const_eval(node)
 169 |     if const then
 170 |         return true, const
 171 |     elseif node.kind == "Literal" then
 172 |         local t = type(node.value)
 173 |         return (t == "string" or t == "boolean" or t == "nil"), node.value
 174 |     else
 175 |         return false
 176 |     end
 177 | end
 178 | 
 179 | local function emit_tdup(self, dest, ins)
 180 |     local kidx, t = self.ctx:new_table_template()
 181 |     ins:rewrite(BC.TDUP, dest, kidx)
 182 |     return t
 183 | end
 184 | 
 185 | local function is_kint(x)
 186 |     return x % 1 == 0 and x >= 0 and x < 2^31
 187 | end
 188 | 
 189 | function ExpressionRule:Table(node, dest)
 190 |     if #node.keyvals == 0 then
 191 |         self.ctx:op_tnew(dest, 0, 0)
 192 |         return
 193 |     end
 194 | 
 195 |     local free = self.ctx.freereg
 196 |     local ins = self.ctx:op_tnew(free, 0, 0)
 197 |     self.ctx:nextreg()
 198 |     local t
 199 |     local vtop = self.ctx.freereg
 200 |     local narray, nhash = 0, 0
 201 |     local na, nh = 0, 0
 202 |     local zeroarr = 0
 203 |     for k = 1, #node.keyvals do
 204 |         local kv = node.keyvals[k]
 205 |         local value, key = kv[1], kv[2]
 206 |         if key then
 207 |             local k_is_const, kval = expr_isk(self, key)
 208 |             local v_is_const, vval = expr_isk(self, value)
 209 |             if k_is_const and kval ~= nil and v_is_const then
 210 |                 if type(kval) == "number" and is_kint(kval) then
 211 |                     if not t then t = emit_tdup(self, free, ins) end
 212 |                     t.array[kval] = vval
 213 |                     narray = math.max(narray, kval + 1)
 214 |                     if kval == 0 then -- Zero-indexed array term.
 215 |                         zeroarr = 1
 216 |                     end
 217 |                 else
 218 |                     nhash = nhash + 1
 219 |                     if not t then t = emit_tdup(self, free, ins) end
 220 |                     -- NB: Adopt the "keyvals" style instead of hash_keys/values.
 221 |                     t.hash_keys[nhash] = kval
 222 |                     t.hash_values[nhash] = vval
 223 |                 end
 224 |             else
 225 |                 local ktag, kval = self:expr_toanyreg_tagged(key, EXPR_EMIT_VSB)
 226 |                 local v = self:expr_toanyreg(value)
 227 |                 self.ctx:op_tset(free, ktag, kval, v)
 228 |                 self.ctx.freereg = vtop
 229 |             end
 230 |             nh = nh + 1
 231 |         else
 232 |             na = na + 1
 233 |             local is_const, expr_val = expr_isk(self, value)
 234 |             if is_const then
 235 |                 if not t then t = emit_tdup(self, free, ins) end
 236 |                 t.array[na] = expr_val
 237 |                 narray = na + 1
 238 |             elseif is_vcall(value) and k == #node.keyvals then
 239 |                 self:expr_tomultireg(value, MULTIRES)
 240 |                 self.ctx:op_tsetm(free, na)
 241 |             else
 242 |                 local ktag, kval
 243 |                 if na < 256 then
 244 |                     ktag, kval = 'B', na
 245 |                 else
 246 |                     ktag, kval = 'V', self.ctx:nextreg()
 247 |                     self.ctx:op_load(kval, na)
 248 |                 end
 249 |                 local v = self:expr_toanyreg(value)
 250 |                 self.ctx:op_tset(free, ktag, kval, v)
 251 |                 self.ctx.freereg = vtop
 252 |             end
 253 |         end
 254 |     end
 255 | 
 256 |     if t then
 257 |         t.narray, t.nhash = narray, nhash
 258 |     else
 259 |         na = na + zeroarr
 260 |         nh = nh - zeroarr
 261 |         local sz = ins.tnewsize(na > 0 and na or nil, nh)
 262 |         ins:rewrite(BC.TNEW, free, sz)
 263 |     end
 264 | 
 265 |     mov_toreg(self.ctx, dest, free)
 266 | 
 267 |     self.ctx.freereg = free
 268 | end
 269 | 
 270 | -- Operations that admit instructions in the form ADDVV, ADDVN, ADDNV
 271 | local dirop = {
 272 |     ['+'] = 'ADD',
 273 |     ['*'] = 'MUL',
 274 |     ['-'] = 'SUB',
 275 |     ['/'] = 'DIV',
 276 |     ['%'] = 'MOD',
 277 | }
 278 | 
 279 | function ExpressionRule:ConcatenateExpression(node, dest)
 280 |     local free = self.ctx.freereg
 281 |     for i = 1, #node.terms do
 282 |         self:expr_tonextreg(node.terms[i])
 283 |     end
 284 |     self.ctx.freereg = free
 285 |     self.ctx:op_cat(dest, free, free + #node.terms - 1)
 286 | end
 287 | 
 288 | function ExpressionRule:BinaryExpression(node, dest, jreg)
 289 |     local free = self.ctx.freereg
 290 |     local o = node.operator
 291 |     if cmpop[o] then
 292 |         local l = genid()
 293 |         self:test_emit(node, l, jreg, false, EXPR_RESULT_BOTH, dest)
 294 |         self.ctx:here(l)
 295 |     elseif dirop[o] then
 296 |         local atag, a = self:expr_toanyreg_tagged(node.left, EXPR_EMIT_VN)
 297 |         local btag, b = self:expr_toanyreg_tagged(node.right, EXPR_EMIT_VN)
 298 |         if atag == "N" and btag == "N" then
 299 |             -- handle "nan" values here the same way LuaJIT does
 300 |             -- usually, both operands will always be 0 when both constant but
 301 |             -- re-check just to make sure, in order to trigger the assert when
 302 |             -- there's a bug in the generator
 303 |             local aval = const_eval(node.left)
 304 |             local bval = const_eval(node.right)
 305 |             if aval == 0 and bval == 0 then
 306 |                 atag, a = "V", self.ctx.freereg
 307 |                 self.ctx:op_load(self.ctx:nextreg(), 0)
 308 |             else
 309 |                 assert(false, "operands are both constants")
 310 |             end
 311 |         end
 312 |         self.ctx.freereg = free
 313 |         self.ctx:op_infix(dirop[o], dest, atag, a, btag, b)
 314 |     else
 315 |         local a = self:expr_toanyreg(node.left)
 316 |         local b = self:expr_toanyreg(node.right)
 317 |         self.ctx.freereg = free
 318 |         if o == '^' then
 319 |             self.ctx:op_pow(dest, a, b)
 320 |         else
 321 |             error("bad binary operator: "..o, 2)
 322 |         end
 323 |     end
 324 | end
 325 | 
 326 | function ExpressionRule:ExpressionValue(node, dest, jreg)
 327 |     self:expr_toreg(node.value, dest, jreg)
 328 | end
 329 | 
 330 | function ExpressionRule:UnaryExpression(node, dest)
 331 |     local free = self.ctx.freereg
 332 |     local a = self:expr_toanyreg(node.argument)
 333 |     self.ctx.freereg = free
 334 |     local o = node.operator
 335 |     if o == '-' then
 336 |         self.ctx:op_unm(dest, a)
 337 |     elseif o == '#' then
 338 |         self.ctx:op_len(dest, a)
 339 |     elseif o == 'not' then
 340 |         self.ctx:op_not(dest, a)
 341 |     else
 342 |         error("bad unary operator: "..o, 2)
 343 |     end
 344 | end
 345 | 
 346 | function ExpressionRule:LogicalExpression(node, dest, jreg)
 347 |     local negate = (node.operator == 'or')
 348 |     local lstore = store_bit(negate)
 349 |     local l = genid()
 350 |     self:test_emit(node.left, l, jreg, negate, lstore, dest)
 351 |     self:expr_toreg(node.right, dest, jreg)
 352 |     self.ctx:here(l)
 353 | end
 354 | 
 355 | function ExpressionRule:MemberExpression(node, dest)
 356 |     local free = self.ctx.freereg
 357 |     local lhs = self:lhs_expr_emit(node)
 358 |     self.ctx.freereg = free
 359 |     self.ctx:op_tget(dest, lhs.target, lhs.key_type, lhs.key)
 360 | end
 361 | 
 362 | function StatementRule:FunctionDeclaration(node)
 363 |     local path = node.id
 364 |     local lhs
 365 |     if node.locald then
 366 |         -- We avoid calling "lhs_expr_emit" on "path" because
 367 |         -- it would mark the variable as mutable.
 368 |         local vinfo = self.ctx:newvar(path.name)
 369 |         self:expr_toreg(node, vinfo.idx)
 370 |         local pc = #self.ctx.code + 1
 371 |         vinfo.startpc = pc
 372 |         vinfo.endpc = pc
 373 |     else
 374 |         lhs = self:lhs_expr_emit(path)
 375 |         self:expr_tolhs(lhs, node)
 376 |     end
 377 | end
 378 | 
 379 | function ExpressionRule:FunctionExpression(node, dest)
 380 |     local free = self.ctx.freereg
 381 |     local child = self.ctx:child(node.firstline, node.lastline)
 382 |     self.ctx = child
 383 |     for i=1, #node.params do
 384 |         if node.params[i].kind == 'Vararg' then
 385 |             self.ctx.flags = bit.bor(self.ctx.flags, bc.Proto.VARARG)
 386 |         else
 387 |             self.ctx:param(node.params[i].name)
 388 |         end
 389 |     end
 390 |     self:block_emit(node.body)
 391 |     self:close_proto(node.lastline)
 392 | 
 393 |     self.ctx = self.ctx:parent()
 394 |     self.ctx.freereg = free
 395 |     self.ctx:line(node.lastline)
 396 |     self.ctx:op_fnew(dest, child.idx)
 397 | end
 398 | 
 399 | ExpressionRule.FunctionDeclaration = ExpressionRule.FunctionExpression
 400 | 
 401 | local function emit_call_expression(self, node, want, use_tail, use_self)
 402 |     local free = self.ctx.freereg
 403 | 
 404 |     if use_self then
 405 |         local obj = self:expr_toanyreg(node.receiver)
 406 |         self.ctx:op_move(free + 1, obj)
 407 |         self.ctx:setreg(free + 2)
 408 |         local method_type, method = self:property_tagged(node.method.name)
 409 |         self.ctx:op_tget(free, obj, method_type, method)
 410 |         self.ctx.freereg = free + 2
 411 |     else
 412 |         self:expr_tonextreg(node.callee)
 413 |     end
 414 | 
 415 |     local narg = #node.arguments
 416 |     for i=1, narg - 1 do
 417 |         self:expr_tonextreg(node.arguments[i])
 418 |     end
 419 |     local mres = false
 420 |     if narg > 0 then
 421 |         local lastarg = node.arguments[narg]
 422 |         mres = self:expr_tomultireg(lastarg, MULTIRES)
 423 |         self.ctx:nextreg()
 424 |     end
 425 | 
 426 |     if use_self then narg = narg + 1 end
 427 |     self.ctx.freereg = free
 428 |     if mres then
 429 |         if use_tail then
 430 |             self.ctx:close_uvals()
 431 |             self.ctx:op_callmt(free, narg - 1)
 432 |         else
 433 |             self.ctx:op_callm(free, want, narg - 1)
 434 |         end
 435 |     else
 436 |         if use_tail then
 437 |             self.ctx:close_uvals()
 438 |             self.ctx:op_callt(free, narg)
 439 |         else
 440 |             self.ctx:op_call(free, want, narg)
 441 |         end
 442 |     end
 443 | 
 444 |     return want == MULTIRES, use_tail
 445 | end
 446 | 
 447 | function MultiExprRule:CallExpression(node, want, tail)
 448 |     return emit_call_expression(self, node, want, tail, false)
 449 | end
 450 | 
 451 | function MultiExprRule:SendExpression(node, want, tail)
 452 |     return emit_call_expression(self, node, want, tail, true)
 453 | end
 454 | 
 455 | function LHSExpressionRule:Identifier(node)
 456 |     local info, uval = self.ctx:lookup(node.name)
 457 |     if uval then
 458 |         -- Ensure variable is marked as upvalue in proto and take
 459 |         -- upvalue index.
 460 |         info.mutable = true
 461 |         local uv = self.ctx:upval(node.name)
 462 |         return {tag = 'upval', uv = uv}
 463 |     elseif info then
 464 |         info.mutable = true
 465 |         return {tag = 'local', target = info.idx}
 466 |     else
 467 |         return {tag = 'global', name = node.name}
 468 |     end
 469 | end
 470 | 
 471 | function LHSExpressionRule:MemberExpression(node)
 472 |     local target = self:expr_toanyreg(node.object)
 473 |     local key_type, key
 474 |     if node.computed then
 475 |         key_type, key = self:expr_toanyreg_tagged(node.property, EXPR_EMIT_VSB)
 476 |     else
 477 |         key_type, key = self:property_tagged(node.property.name)
 478 |     end
 479 |     return { tag = 'member', target = target, key = key, key_type = key_type }
 480 | end
 481 | 
 482 | function TestRule:Literal(node, jmp, jreg, negate, store, dest)
 483 |     local value = node.value
 484 |     if bit.band(store, store_bit(value)) ~= 0 then
 485 |         self:expr_toreg(node, dest)
 486 |     else
 487 |         jreg = self.ctx.freereg
 488 |     end
 489 |     if (negate and value) or (not negate and not value) then
 490 |         self.ctx:jump(jmp, jreg)
 491 |     end
 492 | end
 493 | 
 494 | local function compare_op(negate, op)
 495 |     local oper_table = negate and cmpop or cmpopinv
 496 |     local e = oper_table[op]
 497 |     return e[1], e[2]
 498 | end
 499 | 
 500 | -- Return true IFF the variable "store" has the EXPR_RESULT_FALSE bit
 501 | -- set. If "negate" is true check the EXPR_RESULT_TRUE bit instead.
 502 | local function has_branch(store, negate)
 503 |     return bit.band(store, store_bit(negate)) ~= 0
 504 | end
 505 | 
 506 | function TestRule:BinaryExpression(node, jmp, jreg, negate, store, dest)
 507 |     local o = node.operator
 508 |     if cmpop[o] then
 509 |         local free = self.ctx.freereg
 510 |         local atag, a, btag, b
 511 |         if o == '==' or o == '~=' then
 512 |             atag, a = self:expr_toanyreg_tagged(node.left, EXPR_EMIT_VSNP)
 513 |             if atag == 'V' then
 514 |                 btag, b = self:expr_toanyreg_tagged(node.right, EXPR_EMIT_VSNP)
 515 |             else
 516 |                 btag, b = atag, a
 517 |                 atag, a = 'V', self:expr_toanyreg(node.right)
 518 |             end
 519 |         else
 520 |             a = self:expr_toanyreg(node.left)
 521 |             b = self:expr_toanyreg(node.right)
 522 |         end
 523 |         self.ctx.freereg = free
 524 |         local use_imbranch = has_branch(store, negate)
 525 |         if use_imbranch then
 526 |             local test, swap = compare_op(not negate, o)
 527 |             local altlabel = genid()
 528 |             self.ctx:op_comp(test, a, btag, b, altlabel, free, swap)
 529 |             self.ctx:op_load(dest, negate)
 530 |             self.ctx:jump(jmp, jreg)
 531 |             self.ctx:here(altlabel)
 532 |             self.ctx.freereg = free
 533 |         else
 534 |             local test, swap = compare_op(negate, o)
 535 |             self.ctx:op_comp(test, a, btag, b, jmp, free, swap)
 536 |         end
 537 |         if has_branch(store, not negate) then
 538 |             self.ctx:op_load(dest, not negate)
 539 |         end
 540 |     else
 541 |         self:expr_test(node, jmp, jreg, negate, store, dest)
 542 |     end
 543 | end
 544 | 
 545 | function TestRule:UnaryExpression(node, jmp, jreg, negate, store, dest)
 546 |     if node.operator == 'not' and store == 0 then
 547 |         self:test_emit(node.argument, jmp, jreg, not negate)
 548 |     else
 549 |         self:expr_test(node, jmp, jreg, negate, store, dest or self.ctx.freereg)
 550 |     end
 551 | end
 552 | 
 553 | function TestRule:LogicalExpression(node, jmp, jreg, negate, store, dest)
 554 |     local or_operator = (node.operator == "or")
 555 |     local lstore = bit.band(store, store_bit(or_operator))
 556 |     local imbranch = xor(negate, or_operator)
 557 |     if imbranch then
 558 |         local templ = genid()
 559 |         self:test_emit(node.left, templ, jreg, not negate, lstore, dest)
 560 |         self:test_emit(node.right, jmp, jreg, negate, store, dest)
 561 |         self.ctx:here(templ)
 562 |     else
 563 |         self:test_emit(node.left, jmp, jreg, negate, lstore, dest)
 564 |         self:test_emit(node.right, jmp, jreg, negate, store, dest)
 565 |     end
 566 | end
 567 | 
 568 | function StatementRule:CallExpression(node)
 569 |     self:expr_tomultireg(node, 0, false)
 570 | end
 571 | 
 572 | function StatementRule:SendExpression(node)
 573 |     self:expr_tomultireg(node, 0, false)
 574 | end
 575 | 
 576 | function StatementRule:LabelStatement(node)
 577 |     local ok, label = self.ctx:goto_label(node.label)
 578 |     if not ok then
 579 |         lang_error(label, self.chunkname, node.line)
 580 |     end
 581 | end
 582 | 
 583 | function StatementRule:GotoStatement(node)
 584 |     self.ctx:goto_jump(node.label, node.line)
 585 | end
 586 | 
 587 | function StatementRule:DoStatement(node)
 588 |     self:block_enter()
 589 |     self:block_emit(node.body)
 590 |     self:block_leave(node.body.lastline)
 591 | end
 592 | 
 593 | function StatementRule:IfStatement(node, root_exit)
 594 |     local free = self.ctx.freereg
 595 |     local ncons = #node.tests
 596 |     -- Count the number of branches, including the "else" branch.
 597 |     local count = node.alternate and ncons + 1 or ncons
 598 |     local local_exit = count > 1 and genid()
 599 |     -- Set the exit point to the extern exit if given or set to local
 600 |     -- exit (potentially false).
 601 |     local exit = root_exit or local_exit
 602 | 
 603 |     for i = 1, ncons do
 604 |         local test, block = node.tests[i], node.cons[i]
 605 |         local next_test = genid()
 606 |         -- Set the exit point to jump on at the end of for this block.
 607 |         -- If this is the last branch (count == 1) set to false.
 608 |         local bexit = count > 1 and exit
 609 | 
 610 |         self:test_emit(test, next_test, free)
 611 | 
 612 |         self:block_enter()
 613 |         self:block_emit(block, bexit)
 614 |         self:block_leave(block.lastline, bexit)
 615 | 
 616 |         self.ctx:here(next_test)
 617 |         count = count - 1
 618 |     end
 619 | 
 620 |     if node.alternate then
 621 |         self:block_enter()
 622 |         self:block_emit(node.alternate)
 623 |         self:block_leave(node.alternate.lastline)
 624 |     end
 625 |     if exit and exit == local_exit then
 626 |         self.ctx:here(exit)
 627 |     end
 628 |     self.ctx.freereg = free
 629 | end
 630 | function StatementRule:ExpressionStatement(node)
 631 |     return self:emit(node.expression)
 632 | end
 633 | function StatementRule:LocalDeclaration(node)
 634 |     local nvars = #node.names
 635 |     local nexps = #node.expressions
 636 |     local base = self.ctx.freereg
 637 |     local slots = nvars
 638 |     for i = 1, nexps - 1 do
 639 |         if slots == 0 then break end
 640 |         self:expr_tonextreg(node.expressions[i])
 641 |         slots = slots - 1
 642 |     end
 643 | 
 644 |     if slots > 0 then
 645 |         if nexps > 0 then
 646 |             self:expr_tomultireg(node.expressions[nexps], slots)
 647 |         else
 648 |             self.ctx:op_nils(base, slots)
 649 |         end
 650 |         self.ctx:nextreg(slots)
 651 |     end
 652 | 
 653 |     for i=1, nvars do
 654 |         local lhs = node.names[i]
 655 |         self.ctx:newvar(lhs.name, base + (i - 1))
 656 |     end
 657 | end
 658 | 
 659 | -- Eliminate write-after-read hazards for local variable assignment.
 660 | -- Implement the same approach found in lj_parse.c from luajit.
 661 | -- Check left-hand side against variable register "reg".
 662 | local function assign_hazard(self, lhs, reg)
 663 |     local tmp = self.ctx.freereg -- Rename to this temp. register (if needed).
 664 |     local hazard = false
 665 |     for i =  #lhs, 1, -1 do
 666 |         if lhs[i].tag == 'member' then
 667 |             if lhs[i].target == reg then -- t[i], t = 1, 2
 668 |                 hazard = true
 669 |                 lhs[i].target = tmp
 670 |             end
 671 |             if lhs[i].key_type == 'V' and
 672 |                lhs[i].key == reg then -- t[i], i = 1, 2
 673 |                 hazard = true
 674 |                 lhs[i].key = tmp
 675 |             end
 676 |         end
 677 |     end
 678 |     if hazard then
 679 |         self.ctx:nextreg()
 680 |         self.ctx:op_move(tmp, reg)
 681 |     end
 682 | end
 683 | 
 684 | function StatementRule:AssignmentExpression(node)
 685 |     local free = self.ctx.freereg
 686 |     local nvars = #node.left
 687 |     local nexps = #node.right
 688 | 
 689 |     local lhs = { }
 690 |     for i = 1, nvars do
 691 |         local va = self:lhs_expr_emit(node.left[i])
 692 |         if va.tag == 'local' then
 693 |             assign_hazard(self, lhs, va.target)
 694 |         end
 695 |         lhs[i] = va
 696 |     end
 697 | 
 698 |     local slots = nvars
 699 |     local exprs = { }
 700 |     for i=1, nexps - 1 do
 701 |         if slots == 0 then break end
 702 |         -- LuaJIT compatibility:
 703 |         -- Use a temporary register even the LHS is not an immediate local
 704 |         -- variable.
 705 |         local use_reg = true
 706 |         -- local use_reg = is_local_var(self.ctx, node.left[i])
 707 |         if use_reg then
 708 |             exprs[i] = self:expr_tonextreg(node.right[i])
 709 |         else
 710 |             exprs[i] = self:expr_toanyreg(node.right[i])
 711 |         end
 712 |         slots = slots - 1
 713 |     end
 714 | 
 715 |     local i = nexps
 716 |     if slots == 1 then
 717 |         -- Case where (nb of expression) >= (nb of variables).
 718 |         self:expr_tolhs(lhs[i], node.right[i])
 719 |     else
 720 |         -- Case where (nb of expression) < (nb of variables). In this case
 721 |         -- we cosider that the last expression can generate multiple values.
 722 |         local exp_base = self.ctx.freereg
 723 |         self:expr_tomultireg(node.right[i], slots)
 724 |         for k = slots - 1, 0, -1 do
 725 |             self:assign(lhs[i + k], exp_base + k)
 726 |         end
 727 |     end
 728 | 
 729 |     for i = nvars - slots, 1, -1 do
 730 |         self:assign(lhs[i], exprs[i])
 731 |     end
 732 | 
 733 |     self.ctx.freereg = free
 734 | end
 735 | function StatementRule:WhileStatement(node)
 736 |     local free = self.ctx.freereg
 737 |     local loop, exit = genid(), genid()
 738 |     self:loop_enter(exit, free)
 739 |     self.ctx:here(loop)
 740 |     self:test_emit(node.test, exit, free)
 741 |     self.ctx:loop(exit)
 742 |     self:block_emit(node.body)
 743 |     self.ctx:jump(loop, free)
 744 |     self.ctx:here(exit)
 745 |     self:loop_leave(node.lastline)
 746 |     self.ctx.freereg = free
 747 | end
 748 | function StatementRule:RepeatStatement(node)
 749 |     local free = self.ctx.freereg
 750 |     local loop, exit = genid(), genid()
 751 |     self:loop_enter(exit, free)
 752 |     self.ctx:here(loop)
 753 |     self.ctx:loop(exit)
 754 |     self:block_emit(node.body)
 755 |     self:test_emit(node.test, loop, free)
 756 |     self.ctx:here(exit)
 757 |     self:loop_leave(node.lastline)
 758 |     self.ctx.freereg = free
 759 | end
 760 | function StatementRule:BreakStatement()
 761 |     local base, exit, need_uclo = self.ctx:current_loop()
 762 |     self.ctx:scope_jump(exit, base, need_uclo)
 763 |     self.ctx.scope.need_uclo = false
 764 | end
 765 | function StatementRule:ForStatement(node)
 766 |     local free = self.ctx.freereg
 767 |     local exit = genid()
 768 |     local init = node.init
 769 |     local name = init.id.name
 770 |     local line = node.line
 771 | 
 772 |     self:expr_tonextreg(init.value)
 773 |     self:expr_tonextreg(node.last)
 774 |     if node.step then
 775 |         self:expr_tonextreg(node.step)
 776 |     else
 777 |         self.ctx:op_load(self.ctx.freereg, 1)
 778 |         self.ctx:nextreg()
 779 |     end
 780 |     local forivinfo = self.ctx:forivars(0x01)
 781 |     local loop = self.ctx:op_fori(free)
 782 |     self:loop_enter(exit, free)
 783 |     self.ctx:newvar(name)
 784 |     self:block_enter()
 785 |     self:block_emit(node.body)
 786 |     self:block_leave()
 787 |     self:loop_leave(node.body.lastline)
 788 |     self.ctx:op_forl(free, loop)
 789 |     self.ctx:setpcline(line)
 790 |     forivinfo.endpc = #self.ctx.code
 791 |     self.ctx:here(exit)
 792 |     self.ctx.freereg = free
 793 | end
 794 | function StatementRule:ForInStatement(node)
 795 |     local free = self.ctx.freereg
 796 |     local iter = free + 3
 797 |     local line = node.line
 798 | 
 799 |     local loop, exit = genid(), genid()
 800 | 
 801 |     local vars = node.namelist.names
 802 |     local iter_list = node.explist
 803 | 
 804 |     local iter_count = 0
 805 |     for i = 1, #iter_list - 1 do
 806 |         self:expr_tonextreg(iter_list[i])
 807 |         iter_count = iter_count + 1
 808 |         if iter_count == 2 then break end
 809 |     end
 810 | 
 811 |     self:expr_tomultireg(iter_list[iter_count+1], 3 - iter_count) -- func, state, ctl
 812 |     self.ctx:setreg(iter)
 813 |     local forivinfo = self.ctx:forivars(0x04)
 814 |     self.ctx:jump(loop, self.ctx.freereg)
 815 | 
 816 |     self:loop_enter(exit, free)
 817 | 
 818 |     for i=1, #vars do
 819 |         local name = vars[i].name
 820 |         self.ctx:newvar(name, iter + i - 1)
 821 |         self.ctx:setreg(iter + i)
 822 |     end
 823 | 
 824 |     local ltop = self.ctx:here(genid())
 825 |     self:block_emit(node.body)
 826 |     self:loop_leave(node.lastline)
 827 |     self.ctx:here(loop)
 828 |     self.ctx:op_iterc(iter, #vars)
 829 |     self.ctx:setpcline(line)
 830 |     self.ctx:op_iterl(iter, ltop)
 831 |     self.ctx:setpcline(line)
 832 |     forivinfo.endpc = #self.ctx.code
 833 |     self.ctx:here(exit)
 834 |     self.ctx.freereg = free
 835 | end
 836 | 
 837 | function StatementRule:ReturnStatement(node)
 838 |     local narg = #node.arguments
 839 |     local local_var = narg == 1 and is_local_var(self.ctx, node.arguments[1])
 840 |     if narg == 0 then
 841 |         self.ctx:close_uvals()
 842 |         self.ctx:op_ret0()
 843 |     elseif local_var then
 844 |         self.ctx:close_uvals()
 845 |         self.ctx:op_ret1(local_var)
 846 |     else
 847 |         local base = self.ctx.freereg
 848 |         for i=1, narg - 1 do
 849 |             self:expr_tonextreg(node.arguments[i])
 850 |         end
 851 |         local lastarg = node.arguments[narg]
 852 |         local request_tcall = (narg == 1)
 853 |         local mret, tail = self:expr_tomultireg(lastarg, MULTIRES, request_tcall)
 854 |         self.ctx.freereg = base
 855 |         if not tail then
 856 |             self.ctx:close_uvals()
 857 |             if mret then
 858 |                 self.ctx:op_retm(base, narg - 1)
 859 |             elseif narg == 1 then
 860 |                 self.ctx:op_ret1(base)
 861 |             else
 862 |                 self.ctx:op_ret(base, narg)
 863 |             end
 864 |         end
 865 |     end
 866 |     if self.ctx:is_root_scope() then
 867 |         self.ctx.explret = true
 868 |     end
 869 | end
 870 | 
 871 | function StatementRule:Chunk(node, name)
 872 |     self:block_emit(node.body)
 873 |     self:close_proto()
 874 | end
 875 | 
 876 | local function generate(tree, name)
 877 |     local self = { line = 0 }
 878 |     self.main = bc.Proto.new(bc.Proto.VARARG, tree.firstline, tree.lastline)
 879 |     self.ctx = self.main
 880 |     self.chunkname = tree.chunkname
 881 | 
 882 |     function self:block_enter()
 883 |         self.ctx:enter()
 884 |     end
 885 | 
 886 |     function self:block_leave(lastline, exit)
 887 |         self.ctx:fscope_end()
 888 |         self.ctx:close_block(self.ctx.scope.basereg, exit)
 889 |         self.ctx:leave()
 890 |         if lastline then self.ctx:line(lastline) end
 891 |     end
 892 | 
 893 |     function self:loop_enter(exit, exit_reg)
 894 |         self:block_enter()
 895 |         self.ctx:loop_register(exit, exit_reg)
 896 |     end
 897 | 
 898 |     function self:loop_leave(lastline)
 899 |         self:block_leave(lastline)
 900 |     end
 901 | 
 902 |     function self:assign(lhs, expr)
 903 |         local saveline = self.ctx.currline
 904 |         self.ctx:line(lhs.line)
 905 |         if lhs.tag == 'member' then
 906 |             -- SET instructions with a Primitive "P" index are not accepted.
 907 |             -- The method self:lhs_expr_emit does never generate such requests.
 908 |             assert(lhs.key_type ~= 'P', "invalid assignment instruction")
 909 |             self.ctx:op_tset(lhs.target, lhs.key_type, lhs.key, expr)
 910 |         elseif lhs.tag == 'upval' then
 911 |             self.ctx:op_uset(lhs.uv, 'V', expr)
 912 |         elseif lhs.tag == 'local' then
 913 |             mov_toreg(self.ctx, lhs.target, expr)
 914 |         else
 915 |             self.ctx:op_gset(expr, lhs.name)
 916 |         end
 917 |         self.ctx:line(saveline)
 918 |     end
 919 | 
 920 |     function self:emit(node, ...)
 921 |         if node.line then self.ctx:line(node.line) end
 922 |         local rule = StatementRule[node.kind]
 923 |         if not rule then error("cannot find a statement rule for " .. node.kind) end
 924 |         rule(self, node, ...)
 925 |     end
 926 | 
 927 |     function self:block_emit(stmts, if_exit)
 928 |         local n = #stmts
 929 |         for i = 1, n - 1 do
 930 |             self:emit(stmts[i])
 931 |         end
 932 |         if n > 0 then
 933 |             self:emit(stmts[n], if_exit)
 934 |         end
 935 |     end
 936 | 
 937 |     -- Emit the code to evaluate "node" and perform a conditional
 938 |     -- jump based on its value.
 939 |     -- The arguments "jmp" and "jreg" are respectively the jump location
 940 |     -- and the rbase operand for the JMP operation if the store is performed.
 941 |     -- When no store is done JMP will use "freereg" as rbase operand.
 942 |     -- If "negate" is false the jump on FALSE and viceversa.
 943 |     -- The argument "store" is a bitfield that specifies which
 944 |     -- computed epxression should be stored. The bit EXPR_RESULT_TRUE
 945 |     -- means that the value should be stored when its value is "true".
 946 |     -- If "store" is not ZERO than dest should be the register
 947 |     -- destination for the result.
 948 |     function self:test_emit(node, jmp, jreg, negate, store, dest)
 949 |         if node.line then self.ctx:line(node.line) end
 950 |         local rule = TestRule[node.kind]
 951 |         store = store or 0
 952 |         if rule then
 953 |             rule(self, node, jmp, jreg, negate, store, dest)
 954 |         else
 955 |             self:expr_test(node, jmp, jreg, negate, store, dest)
 956 |         end
 957 |     end
 958 | 
 959 |     -- Emit code to test an expression as a boolean value
 960 |     function self:expr_test(node, jmp, jreg, negate, store, dest)
 961 |         local free = self.ctx.freereg
 962 |         local const_val = boolean_const_eval(node)
 963 |         if const_val ~= nil then
 964 |             if bit.band(store, store_bit(const_val)) ~= 0 then
 965 |                 self.ctx:op_load(dest, const_val)
 966 |             end
 967 |             if xor(negate, not const_val) then
 968 |                 self.ctx:jump(jmp, jreg)
 969 |             end
 970 |         else
 971 |             local expr = self:expr_toanyreg(node)
 972 |             if store ~= 0 then
 973 |                 cond_mov_toreg(self.ctx, negate, dest, expr, jmp, self.ctx.freereg)
 974 |             else
 975 |                 self.ctx:op_test(negate, expr, jmp, self.ctx.freereg)
 976 |             end
 977 |         end
 978 |         self.ctx.freereg = free
 979 |     end
 980 | 
 981 |     -- Emit code to compute the "node" expression in any register. Return
 982 |     -- the register itself and an optional boolean value to indicate if a
 983 |     -- tail call was used.
 984 |     -- If a new register is needed to store the results one is automatically
 985 |     -- allocated and marked as used.
 986 |     function self:expr_toanyreg(node, tail)
 987 |         local localvar = is_local_var(self.ctx, node)
 988 |         if localvar then
 989 |             return localvar, false
 990 |         else
 991 |             local dest = self.ctx.freereg
 992 |             local tailcall = self:expr_toreg(node, dest, dest + 1, tail)
 993 |             return self.ctx:nextreg(), tailcall
 994 |         end
 995 |     end
 996 | 
 997 |     -- Emit code to compute the "node" expression by storing the result in
 998 |     -- the given register "dest". The argument "jreg" indicate the next free
 999 |     -- register to jump in for "test_emit" call (logical expressions).
1000 |     -- The function does return an optional boolean value to indicate if
1001 |     -- a tail call was actually used.
1002 |     -- This function always leave the freereg counter to its initial value.
1003 |     function self:expr_toreg(node, dest, jreg, tail)
1004 |         if node.line then self.ctx:line(node.line) end
1005 |         local const_val = const_eval(node)
1006 |         if const_val then
1007 |             self.ctx:op_load(dest, const_val)
1008 |         else
1009 |             local rule = ExpressionRule[node.kind]
1010 |             if rule then
1011 |                 rule(self, node, dest, jreg or self.ctx.freereg)
1012 |             elseif MultiExprRule[node.kind] then
1013 |                 rule = MultiExprRule[node.kind]
1014 |                 local base = self.ctx.freereg
1015 |                 local mres, tailcall = rule(self, node, 1, base == dest and tail)
1016 |                 mov_toreg(self.ctx, dest, base)
1017 |                 return tailcall
1018 |             else
1019 |                 error("Cannot find an ExpressionRule for " .. node.kind)
1020 |             end
1021 |         end
1022 |         return false -- no tail call
1023 |     end
1024 | 
1025 |     -- Emit code to compute the "node" expression in the next available register
1026 |     -- and increment afterward the free register counter.
1027 |     -- It does call "expr_toreg" with (dest + 1) as "jreg" argument to inform
1028 |     -- an eventual "test_emit" call that the next free register after the expression
1029 |     -- store is (dest + 1).
1030 |     function self:expr_tonextreg(node)
1031 |         local dest = self.ctx.freereg
1032 |         self:expr_toreg(node, dest, dest + 1)
1033 |         self.ctx:setreg(dest + 1)
1034 |         return dest
1035 |     end
1036 | 
1037 |     -- Generate the code to store multiple values in consecutive registers
1038 |     -- starting from the current "freereg". The argument "want" indicate
1039 |     -- how many values should be generated or MULTIRES.
1040 |     -- The optional boolean parameter "tail" indicate if a tail call instruction
1041 |     -- should be generated if possible.
1042 |     -- Return two boolean values. The first indicate if it does return multi
1043 |     -- results. The second if a tail call was actually generated.
1044 |     function self:expr_tomultireg(node, want, tail)
1045 |         if node.line then self.ctx:line(node.line) end
1046 |         local rule = MultiExprRule[node.kind]
1047 |         if rule then
1048 |             return rule(self, node, want, tail)
1049 |         elseif (want > 0 or want == MULTIRES) then
1050 |             local dest = self.ctx.freereg
1051 |             self:expr_toreg(node, dest, dest + 1)
1052 |             self.ctx:maxframe(dest + 1)
1053 |             if want > 1 then
1054 |                 self.ctx:op_nils(dest + 1, want - 1)
1055 |                 self.ctx:maxframe(dest + want)
1056 |             end
1057 |             return false, false
1058 |         end
1059 |     end
1060 | 
1061 |     -- Like "expr_toreg" but it can return an expression (register) or
1062 |     -- an immediate constant. It does return a tag and then the value
1063 |     -- itself.
1064 |     function self:expr_toanyreg_tagged(node, emit)
1065 |         local const_val = const_eval(node)
1066 |         if emit.byte and const_val and is_byte_number(const_val) then
1067 |             return 'B', const_val
1068 |         elseif emit.number and const_val then
1069 |             return 'N', self.ctx:const(const_val)
1070 |         end
1071 |         if node.kind == 'Literal' then
1072 |             local value = node.value
1073 |             local tv = type(value)
1074 |             if emit.primitive and (tv == 'nil' or tv == 'boolean') then
1075 |                 return 'P', self.ctx:kpri(value)
1076 |             elseif emit.string and tv == 'string' then
1077 |                 return self:property_tagged(value)
1078 |             end
1079 |             -- fall through
1080 |         end
1081 |         return 'V', self:expr_toanyreg(node)
1082 |     end
1083 | 
1084 | 
1085 |     function self:property_tagged(property_name)
1086 |         local kprop = self.ctx:const(property_name)
1087 |         if kprop < 255 then
1088 |             return 'S', kprop
1089 |         else
1090 |             local prop = self.ctx:nextreg()
1091 |             self.ctx:op_load(prop, property_name)
1092 |             return 'V', prop
1093 |         end
1094 |     end
1095 | 
1096 |     -- Emit code to store an expression in the given LHS.
1097 |     function self:expr_tolhs(lhs, expr)
1098 |         local free = self.ctx.freereg
1099 |         if lhs.tag == 'upval' then
1100 |             local tag, expr = self:expr_toanyreg_tagged(expr, EXPR_EMIT_VSNP)
1101 |             self.ctx:op_uset(lhs.uv, tag, expr)
1102 |             self.ctx:setpcline(lhs.line)
1103 |         elseif lhs.tag == 'local' then
1104 |             self:expr_toreg(expr, lhs.target)
1105 |         else
1106 |             local reg = self:expr_toanyreg(expr)
1107 |             self:assign(lhs, reg)
1108 |         end
1109 |         self.ctx.freereg = free
1110 |     end
1111 | 
1112 |     function self:lhs_expr_emit(node)
1113 |         local line = self.ctx.currline
1114 |         local rule = assert(LHSExpressionRule[node.kind], "undefined assignment rule for node type: \"" .. node.kind .. "\"")
1115 |         local lhs = rule(self, node)
1116 |         lhs.line = line
1117 |         return lhs
1118 |     end
1119 | 
1120 |     function self:close_proto(lastline)
1121 |         if lastline then self.ctx:line(lastline) end
1122 |         local err, line = self.ctx:close_proto()
1123 |         if err then
1124 |             lang_error(err, self.chunkname, line)
1125 |         end
1126 |     end
1127 | 
1128 |     self:emit(tree)
1129 | 
1130 |     local dump = bc.Dump.new(self.main, name)
1131 |     return dump:pack()
1132 | end
1133 | 
1134 | return generate
1135 | 


--------------------------------------------------------------------------------
/lexer.lua:
--------------------------------------------------------------------------------
  1 | local ffi = require('ffi')
  2 | 
  3 | local band = bit.band
  4 | local strsub, strbyte, strchar = string.sub, string.byte, string.char
  5 | 
  6 | local ASCII_0, ASCII_9 = 48, 57
  7 | local ASCII_a, ASCII_f, ASCII_z = 97, 102, 122
  8 | local ASCII_A, ASCII_Z = 65, 90
  9 | 
 10 | local END_OF_STREAM = -1
 11 | 
 12 | local ReservedKeyword = {['and'] = 1, ['break'] = 2, ['do'] = 3, ['else'] = 4, ['elseif'] = 5, ['end'] = 6, ['false'] = 7, ['for'] = 8, ['function'] = 9, ['goto'] = 10, ['if'] = 11, ['in'] = 12, ['local'] = 13, ['nil'] = 14, ['not'] = 15, ['or'] = 16, ['repeat'] = 17, ['return'] = 18, ['then'] = 19, ['true'] = 20, ['until'] = 21, ['while'] = 22 }
 13 | 
 14 | local uint64, int64 = ffi.typeof('uint64_t'), ffi.typeof('int64_t')
 15 | local complex = ffi.typeof('complex')
 16 | 
 17 | local TokenSymbol = { TK_ge = '>=', TK_le = '<=' , TK_concat = '..', TK_eq = '==', TK_ne = '~=', TK_eof = '<eof>' }
 18 | 
 19 | local function token2str(tok)
 20 |     if string.match(tok, "^TK_") then
 21 |         return TokenSymbol[tok] or string.sub(tok, 4)
 22 |     else
 23 |         return tok
 24 |     end
 25 | end
 26 | 
 27 | local function error_lex(chunkname, tok, line, em, ...)
 28 |     local emfmt = string.format(em, ...)
 29 |     local msg = string.format("%s:%d: %s", chunkname, line, emfmt)
 30 |     if tok then
 31 |         msg = string.format("%s near '%s'", msg, tok)
 32 |     end
 33 |     error("LLT-ERROR" .. msg, 0)
 34 | end
 35 | 
 36 | local function lex_error(ls, token, em, ...)
 37 |     local tok
 38 |     if token == 'TK_name' or token == 'TK_string' or token == 'TK_number' then
 39 |         tok = ls.save_buf
 40 |     elseif token then
 41 |         tok = token2str(token)
 42 |     end
 43 |     error_lex(ls.chunkname, tok, ls.linenumber, em, ...)
 44 | end
 45 | 
 46 | local function char_isident(c)
 47 |     if type(c) == 'string' then
 48 |         local b = strbyte(c)
 49 |         if b >= ASCII_0 and b <= ASCII_9 then
 50 |             return true
 51 |         elseif b >= ASCII_a and b <= ASCII_z then
 52 |             return true
 53 |         elseif b >= ASCII_A and b <= ASCII_Z then
 54 |             return true
 55 |         else
 56 |             return (c == '_')
 57 |         end
 58 |     end
 59 |     return false
 60 | end
 61 | 
 62 | local function char_isdigit(c)
 63 |     if type(c) == 'string' then
 64 |         local b = strbyte(c)
 65 |         return b >= ASCII_0 and b <= ASCII_9
 66 |     end
 67 |     return false
 68 | end
 69 | 
 70 | local function char_isspace(c)
 71 |     local b = strbyte(c)
 72 |     return b >= 9 and b <= 13 or b == 32
 73 | end
 74 | 
 75 | local function byte(ls, n)
 76 |     local k = ls.p + n
 77 |     return strsub(ls.data, k, k)
 78 | end
 79 | 
 80 | local function skip(ls, n)
 81 |     ls.n = ls.n - n
 82 |     ls.p = ls.p + n
 83 | end
 84 | 
 85 | local function pop(ls)
 86 |     local k = ls.p
 87 |     local c = strsub(ls.data, k, k)
 88 |     ls.p = k + 1
 89 |     ls.n = ls.n - 1
 90 |     return c
 91 | end
 92 | 
 93 | local function fillbuf(ls)
 94 |     local data = ls:read_func()
 95 |     if not data then
 96 |         return END_OF_STREAM
 97 |     end
 98 |     ls.data, ls.n, ls.p = data, #data, 1
 99 |     return pop(ls)
100 | end
101 | 
102 | local function nextchar(ls)
103 |     local c = ls.n > 0 and pop(ls) or fillbuf(ls)
104 |     ls.current = c
105 |     return c
106 | end
107 | 
108 | local function curr_is_newline(ls)
109 |     local c = ls.current
110 |     return (c == '\n' or c == '\r')
111 | end
112 | 
113 | local function resetbuf(ls)
114 |     ls.save_buf = ''
115 | end
116 | 
117 | local function resetbuf_tospace(ls)
118 |     ls.space_buf = ls.space_buf .. ls.save_buf
119 |     ls.save_buf = ''
120 | end
121 | 
122 | local function spaceadd(ls, str)
123 |     ls.space_buf = ls.space_buf .. str
124 | end
125 | 
126 | local function save(ls, c)
127 |     ls.save_buf = ls.save_buf .. c
128 | end
129 | 
130 | local function savespace_and_next(ls)
131 |     ls.space_buf = ls.space_buf .. ls.current
132 |     nextchar(ls)
133 | end
134 | 
135 | local function save_and_next(ls)
136 |     ls.save_buf = ls.save_buf .. ls.current
137 |     nextchar(ls)
138 | end
139 | 
140 | local function get_string(ls, init_skip, end_skip)
141 |     return strsub(ls.save_buf, init_skip + 1, - (end_skip + 1))
142 | end
143 | 
144 | local function get_space_string(ls)
145 |     local s = ls.space_buf
146 |     ls.space_buf = ''
147 |     return s
148 | end
149 | 
150 | local function inclinenumber(ls)
151 |     local old = ls.current
152 |     savespace_and_next(ls) -- skip `\n' or `\r'
153 |     if curr_is_newline(ls) and ls.current ~= old then
154 |         savespace_and_next(ls) -- skip `\n\r' or `\r\n'
155 |     end
156 |     ls.linenumber = ls.linenumber + 1
157 | end
158 | 
159 | local function skip_sep(ls)
160 |     local count = 0
161 |     local s = ls.current
162 |     assert(s == '[' or s == ']')
163 |     save_and_next(ls)
164 |     while ls.current == '=' do
165 |         save_and_next(ls)
166 |         count = count + 1
167 |     end
168 |     return ls.current == s and count or (-count - 1)
169 | end
170 | 
171 | local function build_64int(str)
172 |     local u = str[#str - 2]
173 |     local x = (u == 117 and uint64(0) or int64(0))
174 |     local i = 1
175 |     while str[i] >= ASCII_0 and str[i] <= ASCII_9 do
176 |         x = 10 * x + (str[i] - ASCII_0)
177 |         i = i + 1
178 |     end
179 |     return x
180 | end
181 | 
182 | -- Only lower case letters are accepted.
183 | local function byte_to_hexdigit(b)
184 |     if b >= ASCII_0 and b <= ASCII_9 then
185 |         return b - ASCII_0
186 |     elseif b >= ASCII_a and b <= ASCII_f then
187 |         return 10 + (b - ASCII_a)
188 |     else
189 |         return -1
190 |     end
191 | end
192 | 
193 | local function build_64hex(str)
194 |     local u = str[#str - 2]
195 |     local x = (u == 117 and uint64(0) or int64(0))
196 |     local i = 3
197 |     while str[i] do
198 |         local n = byte_to_hexdigit(str[i])
199 |         if n < 0 then break end
200 |         x = 16 * x + n
201 |         i = i + 1
202 |     end
203 |     return x
204 | end
205 | 
206 | local function strnumdump(str)
207 |     local t = {}
208 |     for i = 1, #str do
209 |         local c = strsub(str, i, i)
210 |         if char_isident(c) then
211 |             t[i] = strbyte(c)
212 |         else
213 |             return nil
214 |         end
215 |     end
216 |     return t
217 | end
218 | 
219 | local function lex_number(ls)
220 |     local lower = string.lower
221 |     local xp = 'e'
222 |     local c = ls.current
223 |     if c == '0' then
224 |         save_and_next(ls)
225 |         local xc = ls.current
226 |         if xc == 'x' or xc == 'X' then xp = 'p' end
227 |     end
228 |     while char_isident(ls.current) or ls.current == '.' or
229 |         ((ls.current == '-' or ls.current == '+') and lower(c) == xp) do
230 |         c = lower(ls.current)
231 |         save(ls, c)
232 |         nextchar(ls)
233 |     end
234 |     local str = ls.save_buf
235 |     local x
236 |     if strsub(str, -1, -1) == 'i' then
237 |         local img = tonumber(strsub(str, 1, -2))
238 |         if img then x = complex(0, img) end
239 |     elseif strsub(str, -2, -1) == 'll' then
240 |         local t = strnumdump(str)
241 |         if t then
242 |             x = xp == 'e' and build_64int(t) or build_64hex(t)
243 |         end
244 |     else
245 |         x = tonumber(str)
246 |     end
247 |     if x then
248 |         return x
249 |     else
250 |         lex_error(ls, 'TK_number', "malformed number")
251 |     end
252 | end
253 | 
254 | local function read_long_string(ls, sep, ret_value)
255 |     save_and_next(ls) -- skip 2nd `['
256 |     if curr_is_newline(ls) then -- string starts with a newline?
257 |         inclinenumber(ls) -- skip it
258 |     end
259 |     while true do
260 |         local c = ls.current
261 |         if c == END_OF_STREAM then
262 |             lex_error(ls, 'TK_eof', ret_value and "unfinished long string" or "unfinished long comment")
263 |         elseif c == ']' then
264 |             if skip_sep(ls) == sep then
265 |                 save_and_next(ls) -- skip 2nd `['
266 |                 break
267 |             end
268 |         elseif c == '\n' or c == '\r' then
269 |             save(ls, '\n')
270 |             inclinenumber(ls)
271 |             if not ret_value then
272 |                 resetbuf(ls) -- avoid wasting space
273 |             end
274 |         else
275 |             if ret_value then save_and_next(ls)
276 |             else nextchar(ls) end
277 |         end
278 |     end
279 |     if ret_value then
280 |         return get_string(ls, 2 + sep, 2 + sep)
281 |     end
282 | end
283 | 
284 | local Escapes = {
285 |     a = '\a', b = '\b', f = '\f', n = '\n', r = '\r', t = '\t',
286 |     v = '\v',
287 | }
288 | 
289 | local function hex_char(c)
290 |     if string.match(c, '^%x') then
291 |         local b = band(strbyte(c), 15)
292 |         if not char_isdigit(c) then b = b + 9 end
293 |         return b
294 |     end
295 | end
296 | 
297 | local function read_escape_char(ls)
298 |     local c = nextchar(ls) -- Skip the '\\'.
299 |     local esc = Escapes[c]
300 |     if esc then
301 |         save(ls, esc)
302 |         nextchar(ls)
303 |     elseif c == 'x' then -- Hexadecimal escape '\xXX'.
304 |         local ch1 = hex_char(nextchar(ls))
305 |         local hc
306 |         if ch1 then
307 |             local ch2 = hex_char(nextchar(ls))
308 |             if ch2 then
309 |                 hc = strchar(ch1 * 16 + ch2)
310 |             end
311 |         end
312 |         if not hc then
313 |             lex_error(ls, 'TK_string', "invalid escape sequence")
314 |         end
315 |         save(ls, hc)
316 |         nextchar(ls)
317 |     elseif c == 'z' then -- Skip whitespace.
318 |         nextchar(ls)
319 |         while char_isspace(ls.current) do
320 |             if curr_is_newline(ls) then inclinenumber(ls) else nextchar(ls) end
321 |         end
322 |     elseif c == '\n' or c == '\r' then
323 |         save(ls, '\n')
324 |         inclinenumber(ls)
325 |     elseif c == '\\' or c == '\"' or c == '\'' then
326 |         save(ls, c)
327 |         nextchar(ls)
328 |     elseif c == END_OF_STREAM then
329 |     else
330 |         if not char_isdigit(c) then
331 |             lex_error(ls, 'TK_string', "invalid escape sequence")
332 |         end
333 |         local bc = band(strbyte(c), 15) -- Decimal escape '\ddd'.
334 |         if char_isdigit(nextchar(ls)) then
335 |             bc = bc * 10 + band(strbyte(ls.current), 15)
336 |             if char_isdigit(nextchar(ls)) then
337 |                 bc = bc * 10 + band(strbyte(ls.current), 15)
338 |                 if bc > 255 then
339 |                     lex_error(ls, 'TK_string', "invalid escape sequence")
340 |                 end
341 |                 nextchar(ls)
342 |             end
343 |         end
344 |         save(ls, strchar(bc))
345 |     end
346 | end
347 | 
348 | local function read_string(ls, delim)
349 |     save_and_next(ls)
350 |     while ls.current ~= delim do
351 |         local c = ls.current
352 |         if c == END_OF_STREAM then
353 |             lex_error(ls, 'TK_eof', "unfinished string")
354 |         elseif c == '\n' or c == '\r' then
355 |             lex_error(ls, 'TK_string', "unfinished string")
356 |         elseif c == '\\' then
357 |             read_escape_char(ls)
358 |         else
359 |             save_and_next(ls)
360 |         end
361 |     end
362 |     save_and_next(ls) -- skip delimiter
363 |     return get_string(ls, 1, 1)
364 | end
365 | 
366 | local function skip_line(ls)
367 |     while not curr_is_newline(ls) and ls.current ~= END_OF_STREAM do
368 |         savespace_and_next(ls)
369 |     end
370 | end
371 | 
372 | local function llex(ls)
373 |     resetbuf(ls)
374 |     while true do
375 |         local current = ls.current
376 |         if char_isident(current) then
377 |             if char_isdigit(current) then -- Numeric literal.
378 |                 return 'TK_number', lex_number(ls)
379 |             end
380 |             repeat
381 |                 save_and_next(ls)
382 |             until not char_isident(ls.current)
383 |             local s = get_string(ls, 0, 0)
384 |             local reserved = ReservedKeyword[s]
385 |             if reserved then
386 |                 return 'TK_' .. s
387 |             else
388 |                 return 'TK_name', s
389 |             end
390 |         end
391 |         if current == '\n' or current == '\r' then
392 |             inclinenumber(ls)
393 |         elseif current == ' ' or current == '\t' or current == '\b' or current == '\f' then
394 |             savespace_and_next(ls)
395 |             -- nextchar(ls)
396 |         elseif current == '-' then
397 |             nextchar(ls)
398 |             if ls.current ~= '-' then return '-' end
399 |             -- else is a comment
400 |             nextchar(ls)
401 |             spaceadd(ls, '--')
402 |             if ls.current == '[' then
403 |                 local sep = skip_sep(ls)
404 |                 resetbuf_tospace(ls) -- `skip_sep' may dirty the buffer
405 |                 if sep >= 0 then
406 |                     read_long_string(ls, sep, false) -- long comment
407 |                     resetbuf_tospace(ls)
408 |                 else
409 |                     skip_line(ls)
410 |                 end
411 |             else
412 |                 skip_line(ls)
413 |             end
414 |         elseif current == '[' then
415 |             local sep = skip_sep(ls)
416 |             if sep >= 0 then
417 |                 local str = read_long_string(ls, sep, true)
418 |                 return 'TK_string', str
419 |             elseif sep == -1 then
420 |                 return '['
421 |             else
422 |                 lex_error(ls, 'TK_string', "delimiter error")
423 |             end
424 |         elseif current == '=' then
425 |             nextchar(ls)
426 |             if ls.current ~= '=' then return '=' else nextchar(ls); return 'TK_eq' end
427 |         elseif current == '<' then
428 |             nextchar(ls)
429 |             if ls.current ~= '=' then return '<' else nextchar(ls); return 'TK_le' end
430 |         elseif current == '>' then
431 |             nextchar(ls)
432 |             if ls.current ~= '=' then return '>' else nextchar(ls); return 'TK_ge' end
433 |         elseif current == '~' then
434 |             nextchar(ls)
435 |             if ls.current ~= '=' then return '~' else nextchar(ls); return 'TK_ne' end
436 |         elseif current == '*' then
437 |             nextchar(ls)
438 |             if ls.current ~= '*' then return '*' else nextchar(ls); return '**' end
439 |         elseif current == '^' then
440 |             nextchar(ls)
441 |             if ls.current ~= '^' then return '^' else nextchar(ls); return '^^' end
442 |         elseif current == ':' then
443 |             nextchar(ls)
444 |             if ls.current ~= ':' then return ':' else nextchar(ls); return 'TK_label' end
445 |         elseif current == '"' or current == "'" then
446 |             local str = read_string(ls, current)
447 |             return 'TK_string', str
448 |         elseif current == '.' then
449 |             save_and_next(ls)
450 |             if ls.current == '.' then
451 |                 nextchar(ls)
452 |                 if ls.current == '.' then
453 |                     nextchar(ls)
454 |                     return 'TK_dots' -- ...
455 |                 end
456 |                 return 'TK_concat' -- ..
457 |             elseif not char_isdigit(ls.current) then
458 |                 return '.'
459 |             else
460 |                 return 'TK_number', lex_number(ls)
461 |             end
462 |         elseif current == END_OF_STREAM then
463 |             return 'TK_eof'
464 |         else
465 |             nextchar(ls)
466 |             return current -- Single-char tokens (+ - / ...).
467 |         end
468 |     end
469 | end
470 | 
471 | local Lexer = {
472 |     token2str = token2str,
473 |     error = lex_error,
474 | }
475 | 
476 | function Lexer.next(ls)
477 |     ls.lastline = ls.linenumber
478 |     if ls.tklookahead == 'TK_eof' then -- No lookahead token?
479 |         ls.token, ls.tokenval = llex(ls) -- Get nextchar token.
480 |         ls.space = get_space_string(ls)
481 |     else
482 |         ls.token, ls.tokenval = ls.tklookahead, ls.tklookaheadval
483 |         ls.space = ls.spaceahead
484 |         ls.tklookahead = 'TK_eof'
485 |     end
486 | end
487 | 
488 | function Lexer.lookahead(ls)
489 |     assert(ls.tklookahead == 'TK_eof')
490 |     ls.tklookahead, ls.tklookaheadval = llex(ls)
491 |     ls.spaceahead = get_space_string(ls)
492 |     return ls.tklookahead
493 | end
494 | 
495 | local LexerClass = { __index = Lexer }
496 | 
497 | local function lex_setup(read_func, chunkname)
498 |     local header = false
499 |     local ls = {
500 |         n = 0,
501 |         tklookahead = 'TK_eof', -- No look-ahead token.
502 |         linenumber = 1,
503 |         lastline = 1,
504 |         read_func = read_func,
505 |         chunkname = chunkname,
506 |         space_buf = ''
507 |     }
508 |     nextchar(ls)
509 |     if ls.current == '\xef' and ls.n >= 2 and
510 |         byte(ls, 0) == '\xbb' and byte(ls, 1) == '\xbf' then -- Skip UTF-8 BOM (if buffered).
511 |         ls.n = ls.n - 2
512 |         ls.p = ls.p + 2
513 |         nextchar(ls)
514 |         header = true
515 |     end
516 |     if ls.current == '#' then
517 |         repeat
518 |             nextchar(ls)
519 |             if ls.current == END_OF_STREAM then return ls end
520 |         until curr_is_newline(ls)
521 |         inclinenumber(ls)
522 |         header = true
523 |     end
524 |     return setmetatable(ls, LexerClass)
525 | end
526 | 
527 | return lex_setup
528 | 


--------------------------------------------------------------------------------
/lua-ast.lua:
--------------------------------------------------------------------------------
  1 | local function build(kind, node)
  2 |     node.kind = kind
  3 |     return node
  4 | end
  5 | 
  6 | local function ident(name, line)
  7 |     return build("Identifier", { name = name, line = line })
  8 | end
  9 | 
 10 | local function does_multi_return(expr)
 11 |     local k = expr.kind
 12 |     return k == "CallExpression" or k == "SendExpression" or k == "Vararg"
 13 | end
 14 | 
 15 | local AST = { }
 16 | 
 17 | local function func_decl(id, body, params, vararg, locald, firstline, lastline)
 18 |     return build("FunctionDeclaration", {
 19 |         id         = id,
 20 |         body       = body,
 21 |         params     = params,
 22 |         vararg     = vararg,
 23 |         locald     = locald,
 24 |         firstline  = firstline,
 25 |         lastline   = lastline,
 26 |         line       = firstline,
 27 |     })
 28 | end
 29 | 
 30 | local function func_expr(body, params, vararg, firstline, lastline)
 31 |     return build("FunctionExpression", { body = body, params = params, vararg = vararg, firstline = firstline, lastline = lastline })
 32 | end
 33 | 
 34 | function AST.expr_function(ast, args, body, proto)
 35 |    return func_expr(body, args, proto.varargs, proto.firstline, proto.lastline)
 36 | end
 37 | 
 38 | function AST.local_function_decl(ast, name, args, body, proto)
 39 |     local id = ast:var_declare(name)
 40 |     return func_decl(id, body, args, proto.varargs, true, proto.firstline, proto.lastline)
 41 | end
 42 | 
 43 | function AST.function_decl(ast, path, args, body, proto)
 44 |    return func_decl(path, body, args, proto.varargs, false, proto.firstline, proto.lastline)
 45 | end
 46 | 
 47 | function AST.chunk(ast, body, chunkname, firstline, lastline)
 48 |     return build("Chunk", { body = body, chunkname = chunkname, firstline = firstline, lastline = lastline })
 49 | end
 50 | 
 51 | function AST.local_decl(ast, vlist, exps, line)
 52 |     local ids = {}
 53 |     for k = 1, #vlist do
 54 |         ids[k] = ast:var_declare(vlist[k])
 55 |     end
 56 |     return build("LocalDeclaration", { names = ids, expressions = exps, line = line })
 57 | end
 58 | 
 59 | function AST.assignment_expr(ast, vars, exps, line)
 60 |     return build("AssignmentExpression", { left = vars, right = exps, line = line })
 61 | end
 62 | 
 63 | function AST.assignment_algebra_expr(ast, vars, exps, line)
 64 |     return build("AssignmentAlgebraExpression", { left = vars, right = exps, line = line })
 65 | end
 66 | 
 67 | function AST.expr_index(ast, v, index, line)
 68 |     return build("MemberExpression", { object = v, property = index, computed = true, line = line })
 69 | end
 70 | 
 71 | function AST.expr_algebra_index(ast, v, line)
 72 |     return build("IndexAlgebraExpression", { object = v, line = line })
 73 | end
 74 | 
 75 | function AST.expr_property(ast, v, prop, line)
 76 |     local index = ident(prop, line)
 77 |     return build("MemberExpression", { object = v, property = index, computed = false, line = line })
 78 | end
 79 | 
 80 | function AST.literal(ast, val)
 81 |     return build("Literal", { value = val })
 82 | end
 83 | 
 84 | function AST.expr_vararg(ast)
 85 |     return build("Vararg", { })
 86 | end
 87 | 
 88 | function AST.expr_brackets(ast, expr)
 89 |     expr.bracketed = true
 90 |     return expr
 91 | end
 92 | 
 93 | function AST.set_expr_last(ast, expr)
 94 |     if expr.bracketed and does_multi_return(expr) then
 95 |         expr.bracketed = nil
 96 |         return build("ExpressionValue", { value = expr })
 97 |     else
 98 |         return expr
 99 |     end
100 | end
101 | 
102 | function AST.expr_table(ast, keyvals, line)
103 |     return build("Table", { keyvals = keyvals, line = line })
104 | end
105 | 
106 | function AST.expr_unop(ast, op, v, line)
107 |     return build("UnaryExpression", { operator = op, argument = v, line = line })
108 | end
109 | 
110 | function AST.expr_algebra_unop(ast, op, v, line)
111 |     return build("UnaryAlgebraExpression", { operator = op, argument = v, line = line })
112 | end
113 | 
114 | local function concat_append(ts, node)
115 |     local n = #ts
116 |     if node.kind == "ConcatenateExpression" then
117 |         for k = 1, #node.terms do ts[n + k] = node.terms[k] end
118 |     else
119 |         ts[n + 1] = node
120 |     end
121 | end
122 | 
123 | function AST.expr_binop(ast, op, expa, expb, line)
124 |     local binop_body = (op ~= '..' and { operator = op, left = expa, right = expb, line = line })
125 |     if binop_body then
126 |         if op == 'and' or op == 'or' then
127 |             return build("LogicalExpression", binop_body)
128 |         else
129 |             return build("BinaryExpression", binop_body)
130 |         end
131 |     else
132 |         local terms = { }
133 |         concat_append(terms, expa)
134 |         concat_append(terms, expb)
135 |         return build("ConcatenateExpression", { terms = terms, line = expa.line })
136 |     end
137 | end
138 | 
139 | function AST.expr_algebra_binop(ast, op, expa, expb, line)
140 |     local binop_body = (op ~= '..' and { operator = op, left = expa, right = expb, line = line })
141 |     if binop_body then
142 |         if op == 'and' or op == 'or' then
143 |             error('not yet implemented')
144 |         else
145 |             return build("BinaryAlgebraExpression", binop_body)
146 |         end
147 |     else
148 |         error('not yet implemented')
149 |     end
150 | end
151 | 
152 | function AST.identifier(ast, name)
153 |     return ident(name)
154 | end
155 | 
156 | function AST.expr_method_call(ast, v, key, args, line)
157 |     local m = ident(key)
158 |     return build("SendExpression", { receiver = v, method = m, arguments = args, line = line })
159 | end
160 | 
161 | function AST.expr_function_call(ast, v, args, line)
162 |     return build("CallExpression", { callee = v, arguments = args, line = line })
163 | end
164 | 
165 | function AST.return_stmt(ast, exps, line)
166 |     return build("ReturnStatement", { arguments = exps, line = line })
167 | end
168 | 
169 | function AST.break_stmt(ast, line)
170 |     return build("BreakStatement", { line = line })
171 | end
172 | 
173 | function AST.label_stmt(ast, name, line)
174 |     return build("LabelStatement", { label = name, line = line })
175 | end
176 | 
177 | function AST.new_statement_expr(ast, expr, line)
178 |     return build("ExpressionStatement", { expression = expr, line = line })
179 | end
180 | 
181 | function AST.if_stmt(ast, tests, cons, else_branch, line)
182 |     return build("IfStatement", { tests = tests, cons = cons, alternate = else_branch, line = line })
183 | end
184 | 
185 | function AST.do_stmt(ast, body, line, lastline)
186 |     return build("DoStatement", { body = body, line = line, lastline = lastline})
187 | end
188 | 
189 | function AST.while_stmt(ast, test, body, line, lastline)
190 |     return build("WhileStatement", { test = test, body = body, line = line, lastline = lastline })
191 | end
192 | 
193 | function AST.repeat_stmt(ast, test, body, line, lastline)
194 |     return build("RepeatStatement", { test = test, body = body, line = line, lastline = lastline })
195 | end
196 | 
197 | function AST.for_stmt(ast, var, init, last, step, body, line, lastline)
198 |     local for_init = build("ForInit", { id = var, value = init, line = line })
199 |     return build("ForStatement", { init = for_init, last = last, step = step, body = body, line = line, lastline = lastline })
200 | end
201 | 
202 | function AST.for_iter_stmt(ast, vars, exps, body, line, lastline)
203 |     local names = build("ForNames", { names = vars, line = line })
204 |     return build("ForInStatement", { namelist = names, explist = exps, body = body, line = line, lastline = lastline })
205 | end
206 | 
207 | function AST.goto_stmt(ast, name, line)
208 |     return build("GotoStatement", { label = name, line = line })
209 | end
210 | 
211 | local function new_scope(parent_scope)
212 |     return {
213 |         vars = { },
214 |         parent = parent_scope,
215 |     }
216 | end
217 | 
218 | function AST.var_declare(ast, name)
219 |     local id = ident(name)
220 |     ast.current.vars[name] = true
221 |     return id
222 | end
223 | 
224 | function AST.fscope_begin(ast)
225 |     ast.current = new_scope(ast.current)
226 | end
227 | 
228 | function AST.fscope_end(ast)
229 |     ast.current = ast.current.parent
230 | end
231 | 
232 | local ASTClass = { __index = AST }
233 | 
234 | local function new_ast()
235 |     return setmetatable({ }, ASTClass)
236 | end
237 | 
238 | return { New = new_ast }
239 | 


--------------------------------------------------------------------------------
/luacode-generator.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | -- luacode-generator.lua
  3 | --
  4 | -- This file is part of the LuaJIT Language Toolkit.
  5 | --
  6 | -- Module to generate the Lua code that corresponds to a given Lua AST Tree.
  7 | -- Can be used as an alternative to the bytecode generator.
  8 | 
  9 | local operator = require("sci-lang.operator")
 10 | 
 11 | local strbyte, strsub = string.byte, string.sub
 12 | 
 13 | local LuaReservedKeyword = {['and'] = 1, ['break'] = 2, ['do'] = 3, ['else'] = 4, ['elseif'] = 5, ['end'] = 6, ['false'] = 7, ['for'] = 8, ['function'] = 9, ['goto'] = 10, ['if'] = 11, ['in'] = 12, ['local'] = 13, ['nil'] = 14, ['not'] = 15, ['or'] = 16, ['repeat'] = 17, ['return'] = 18, ['then'] = 19, ['true'] = 20, ['until'] = 21, ['while'] = 22 }
 14 | 
 15 | local ASCII_0, ASCII_9 = 48, 57
 16 | local ASCII_a, ASCII_z = 97, 122
 17 | local ASCII_A, ASCII_Z = 65, 90
 18 | 
 19 | local function char_isletter(c)
 20 |     local b = strbyte(c)
 21 |     if b >= ASCII_a and b <= ASCII_z then
 22 |         return true
 23 |     elseif b >= ASCII_A and b <= ASCII_Z then
 24 |         return true
 25 |     else
 26 |         return (c == '_')
 27 |     end
 28 | end
 29 | 
 30 | local function char_isdigit(c)
 31 |     local b = strbyte(c)
 32 |     return b >= ASCII_0 and b <= ASCII_9
 33 | end
 34 | 
 35 | local function replace_cc(c)
 36 |     local esc = {
 37 |         ['\a'] = [[\a]], ['\b'] = [[\b]], ['\f'] = [[\f]], ['\n'] = [[\n]], ['\r'] = [[\r]], ['\t'] = [[\t]], ['\v'] = [[\v]]
 38 |     }
 39 |     return esc[c] and esc[c] or ('\\' .. string.format("%d", string.byte(c)))
 40 | end
 41 | 
 42 | local function escape(s)
 43 |     s = string.gsub(s, "[\"\\]", "\\%1")
 44 |     return string.gsub(s, "%c", replace_cc)
 45 | end
 46 | 
 47 | local StatementRule = { }
 48 | local ExpressionRule = { }
 49 | 
 50 | local concat = table.concat
 51 | local format = string.format
 52 | 
 53 | local function is_string(node)
 54 |     return node.kind == "Literal" and type(node.value) == "string"
 55 | end
 56 | 
 57 | local function is_const(node, val)
 58 |     return node.kind == "Literal" and node.value == val
 59 | end
 60 | 
 61 | local function is_literal(node)
 62 |     local k = node.kind
 63 |     return (k == "Literal" or k == "Table")
 64 | end
 65 | 
 66 | local function string_is_ident(str)
 67 |     local c = strsub(str, 1, 1)
 68 |     if c == '' or not char_isletter(c) then
 69 |         return false
 70 |     end
 71 |     for k = 2, #str do
 72 |         c = strsub(str, k, k)
 73 |         if not char_isletter(c) and not char_isdigit(c) then
 74 |             return false
 75 |         end
 76 |     end
 77 |     return not LuaReservedKeyword[str]
 78 | end
 79 | 
 80 | local function comma_sep_list(ls, f)
 81 |     local strls
 82 |     if f then
 83 |         strls = { }
 84 |         for k = 1, #ls do strls[k] = f(ls[k]) end
 85 |     else
 86 |         strls = ls
 87 |     end
 88 |     return concat(strls, ", ")
 89 | end
 90 | 
 91 | local function as_parameter(node)
 92 |     return node.kind == "Vararg" and "..." or node.name
 93 | end
 94 | 
 95 | function ExpressionRule:Identifier(node)
 96 |     return node.name, operator.ident_priority
 97 | end
 98 | 
 99 | function ExpressionRule:Literal(node)
100 |     local val = node.value
101 |     local str = type(val) == "string" and format("\"%s\"", escape(val)) or tostring(val)
102 |     return str, operator.ident_priority
103 | end
104 | 
105 | function ExpressionRule:MemberExpression(node)
106 |     local object, prio = self:expr_emit(node.object)
107 |     if prio < operator.ident_priority or is_literal(node.object) then
108 |         object = "(" .. object .. ")"
109 |     end
110 |     local exp
111 |     if node.computed then
112 |         local prop = self:expr_emit(node.property)
113 |         exp = format("%s[%s]", object, prop)
114 |     else
115 |         exp = format("%s.%s", object, node.property.name)
116 |     end
117 |     return exp, operator.ident_priority
118 | end
119 | 
120 | function ExpressionRule:Vararg()
121 |     return "...", operator.ident_priority
122 | end
123 | 
124 | function ExpressionRule:ExpressionValue(node)
125 |     return "(" .. self:expr_emit(node.value) .. ")"
126 | end
127 | 
128 | function ExpressionRule:BinaryExpression(node)
129 |     local oper = node.operator
130 |     local lprio = operator.left_priority(oper)
131 |     local rprio = operator.right_priority(oper)
132 |     local a, alprio, arprio = self:expr_emit(node.left)
133 |     local b, blprio, brprio = self:expr_emit(node.right)
134 |     if not arprio then arprio = alprio end
135 |     if not brprio then brprio = blprio end
136 |     local ap = arprio <  lprio and format("(%s)", a) or a
137 |     local bp = blprio <= rprio and format("(%s)", b) or b
138 |     return format("%s %s %s", ap, oper, bp), lprio, rprio
139 | end
140 | 
141 | function ExpressionRule:UnaryExpression(node)
142 |     local arg, arg_prio = self:expr_emit(node.argument)
143 |     local op_prio = operator.unary_priority(node.operator)
144 |     if arg_prio < op_prio then arg = format("(%s)", arg) end
145 |     return format("%s %s", node.operator, arg), operator.unary_priority(node.operator)
146 | end
147 | 
148 | ExpressionRule.LogicalExpression = ExpressionRule.BinaryExpression
149 | 
150 | function ExpressionRule:ConcatenateExpression(node)
151 |     local ls = { }
152 |     local cat_prio = operator.left_priority("..")
153 |     for k = 1, #node.terms do
154 |         local kprio
155 |         ls[k], kprio = self:expr_emit(node.terms[k])
156 |         if kprio < cat_prio then ls[k] = format("(%s)", ls[k]) end
157 |     end
158 |     return concat(ls, " .. "), cat_prio
159 | end
160 | 
161 | function ExpressionRule:Table(node)
162 |     local hash = { }
163 |     for i = 1, #node.keyvals do
164 |         local kv = node.keyvals[i]
165 |         local val = self:expr_emit(kv[1])
166 |         local key = kv[2]
167 |         if key then
168 |             if is_string(key) and string_is_ident(key.value) then
169 |                 hash[i] = format("%s = %s", key.value, val)
170 |             else
171 |                 hash[i] = format("[%s] = %s", self:expr_emit(key), val)
172 |             end
173 |         else
174 |             hash[i] = format("%s", val)
175 |         end
176 |     end
177 |     local content = ""
178 |     if #hash > 0 then
179 |         content = comma_sep_list(hash)
180 |     end
181 |     return "{" .. content .. "}", operator.ident_priority
182 | end
183 | 
184 | function ExpressionRule:CallExpression(node)
185 |     local callee, prio = self:expr_emit(node.callee)
186 |     if prio < operator.ident_priority then
187 |         callee = "(" .. callee .. ")"
188 |     end
189 |     local exp = format("%s(%s)", callee, self:expr_list(node.arguments))
190 |     return exp, operator.ident_priority
191 | end
192 | 
193 | function ExpressionRule:SendExpression(node)
194 |     local rec, prio = self:expr_emit(node.receiver)
195 |     if prio < operator.ident_priority or is_literal(node.receiver) then
196 |         rec = "(" .. rec .. ")"
197 |     end
198 |     local method = node.method.name
199 |     local exp = format("%s:%s(%s)", rec, method, self:expr_list(node.arguments))
200 |     return exp, operator.ident_priority
201 | end
202 | 
203 | function StatementRule:FunctionDeclaration(node)
204 |     self:proto_enter(0)
205 |     local name = self:expr_emit(node.id)
206 |     local header = format("function %s(%s)", name, comma_sep_list(node.params, as_parameter))
207 |     if node.locald then
208 |         header = "local " .. header
209 |     end
210 |     self:add_section(header, node.body)
211 |     local child_proto = self:proto_leave()
212 |     self.proto:merge(child_proto)
213 | end
214 | 
215 | function ExpressionRule:FunctionExpression(node)
216 |     self:proto_enter()
217 |     local header = format("function(%s)", comma_sep_list(node.params, as_parameter))
218 |     self:add_section(header, node.body)
219 |     local child_proto = self:proto_leave()
220 |     return child_proto:inline(), 0
221 | end
222 | 
223 | function StatementRule:CallExpression(node)
224 |     local line = self:expr_emit(node)
225 |     self:add_line(line)
226 | end
227 | 
228 | function StatementRule:ForStatement(node)
229 |     local init = node.init
230 |     local istart = self:expr_emit(init.value)
231 |     local iend = self:expr_emit(node.last)
232 |     local header
233 |     if node.step and not is_const(node.step, 1) then
234 |         local step = self:expr_emit(node.step)
235 |         header = format("for %s = %s, %s, %s do", init.id.name, istart, iend, step)
236 |     else
237 |         header = format("for %s = %s, %s do", init.id.name, istart, iend)
238 |     end
239 |     self:add_section(header, node.body)
240 | end
241 | 
242 | function StatementRule:ForInStatement(node)
243 |     local vars = comma_sep_list(node.namelist.names, as_parameter)
244 |     local explist = self:expr_list(node.explist)
245 |     local header = format("for %s in %s do", vars, explist)
246 |     self:add_section(header, node.body)
247 | end
248 | 
249 | function StatementRule:DoStatement(node)
250 |     self:add_section("do", node.body)
251 | end
252 | 
253 | function StatementRule:WhileStatement(node)
254 |     local test = self:expr_emit(node.test)
255 |     local header = format("while %s do", test)
256 |     self:add_section(header, node.body)
257 | end
258 | 
259 | function StatementRule:RepeatStatement(node)
260 |     self:add_section("repeat", node.body, true)
261 |     local test = self:expr_emit(node.test)
262 |     local until_line = format("until %s", test)
263 |     self:add_line(until_line)
264 | end
265 | 
266 | function StatementRule:BreakStatement()
267 |     self:add_line("break")
268 | end
269 | 
270 | function StatementRule:IfStatement(node)
271 |     local ncons = #node.tests
272 |     for i = 1, ncons do
273 |         local header_tag = i == 1 and "if" or "elseif"
274 |         local test = self:expr_emit(node.tests[i])
275 |         local header = format("%s %s then", header_tag, test)
276 |         self:add_section(header, node.cons[i], true)
277 |     end
278 |     if node.alternate then
279 |         self:add_section("else", node.alternate, true)
280 |     end
281 |     self:add_line("end")
282 | end
283 | 
284 | function StatementRule:LocalDeclaration(node)
285 |     local line
286 |     local names = comma_sep_list(node.names, as_parameter)
287 |     if #node.expressions > 0 then
288 |         line = format("local %s = %s", names, self:expr_list(node.expressions))
289 |     else
290 |         line = format("local %s", names)
291 |     end
292 |     self:add_line(line)
293 | end
294 | 
295 | function StatementRule:AssignmentExpression(node)
296 |     local line = format("%s = %s", self:expr_list(node.left), self:expr_list(node.right))
297 |     self:add_line(line)
298 | end
299 | 
300 | function StatementRule:Chunk(node)
301 |     self:list_emit(node.body)
302 | end
303 | 
304 | function StatementRule:ExpressionStatement(node)
305 |     local line = self:expr_emit(node.expression)
306 |     self:add_line(line)
307 | end
308 | 
309 | function StatementRule:ReturnStatement(node)
310 |     local line = format("return %s", self:expr_list(node.arguments))
311 |     self:add_line(line)
312 | end
313 | 
314 | function StatementRule:LabelStatement(node)
315 |    self:add_line("::" .. node.label .. "::")
316 | end
317 | 
318 | function StatementRule:GotoStatement(node)
319 |    self:add_line("goto " .. node.label)
320 | end
321 | 
322 | local function proto_inline(proto)
323 |     -- remove leading whitespaces from first line
324 |     if #proto.code > 0 then
325 |         proto.code[1] = string.gsub(proto.code[1], "^%s*", "")
326 |     end
327 |     return concat(proto.code, "\n")
328 | end
329 | 
330 | local function proto_merge(proto, child)
331 |     for k = 1, #child.code do
332 |         local line = child.code[k]
333 |         local indent_str = string.rep("    ", proto.indent)
334 |         proto.code[#proto.code + 1] = indent_str .. line
335 |     end
336 | end
337 | 
338 | local function proto_new(parent, indent)
339 |     local ind = 0
340 |     if indent then
341 |         ind = indent
342 |     elseif parent then
343 |         ind = parent.indent
344 |     end
345 |     local proto = { code = { }, indent = ind, parent = parent }
346 |     proto.inline = proto_inline
347 |     proto.merge = proto_merge
348 |     return proto
349 | end
350 | 
351 | local function generate(tree, name)
352 | 
353 |     local self = { line = 0 }
354 |     self.proto = proto_new()
355 |     self.chunkname = tree.chunkname
356 | 
357 |     function self:proto_enter(indent)
358 |         self.proto = proto_new(self.proto, indent)
359 |     end
360 | 
361 |     function self:proto_leave()
362 |         local proto = self.proto
363 |         self.proto = proto.parent
364 |         return proto
365 |     end
366 | 
367 |     local function to_expr(node)
368 |         return self:expr_emit(node)
369 |     end
370 | 
371 |     function self:compile_code()
372 |         return concat(self.code, "\n")
373 |     end
374 | 
375 |     function self:indent_more()
376 |         local proto = self.proto
377 |         proto.indent = proto.indent + 1
378 |     end
379 | 
380 |     function self:indent_less()
381 |         local proto = self.proto
382 |         proto.indent = proto.indent - 1
383 |     end
384 | 
385 |     function self:line(line)
386 |         -- FIXME: ignored for the moment
387 |     end
388 | 
389 |     function self:add_line(line)
390 |         local proto = self.proto
391 |         local indent = string.rep("    ", proto.indent)
392 |         proto.code[#proto.code + 1] = indent .. line
393 |     end
394 | 
395 |     function self:add_section(header, body, omit_end)
396 |         self:add_line(header)
397 |         self:indent_more()
398 |         self:list_emit(body)
399 |         self:indent_less()
400 |         if not omit_end then
401 |             self:add_line("end")
402 |         end
403 |     end
404 | 
405 |     function self:expr_emit(node)
406 |         local rule = ExpressionRule[node.kind]
407 |         if not rule then error("cannot find an expression rule for " .. node.kind) end
408 |         return rule(self, node)
409 |     end
410 | 
411 |     function self:expr_list(exps)
412 |         return comma_sep_list(exps, to_expr)
413 |     end
414 | 
415 |     function self:emit(node)
416 |         local rule = StatementRule[node.kind]
417 |           if not rule then error("cannot find a statement rule for " .. node.kind) end
418 |           rule(self, node)
419 |           if node.line then self:line(node.line) end
420 |     end
421 | 
422 |     function self:list_emit(node_list)
423 |         for i = 1, #node_list do
424 |             self:emit(node_list[i])
425 |         end
426 |     end
427 | 
428 |     self:emit(tree)
429 | 
430 |     return self:proto_leave():inline()
431 | end
432 | 
433 | return generate
434 | 


--------------------------------------------------------------------------------
/operator.lua:
--------------------------------------------------------------------------------
 1 | -- Priorities for each binary operator.
 2 | -- (left priority) * 256 + (right priority)
 3 | -- modulus is your friend
 4 | local binop = {
 5 |     ['+']  = 6 * 256 + 6, ['-']  = 6 * 256 + 6, ['*'] = 7 * 256 + 7, ['/'] = 7 * 256 + 7, ['%'] = 7 * 256 + 7,
 6 |     ['^']  = 10* 256 + 9, ['..'] = 5 * 256 + 4, -- POW CONCAT (right associative)
 7 |     ['=='] = 3 * 256 + 3, ['~='] = 3 * 256 + 3,
 8 |     ['<']  = 3 * 256 + 3, ['>='] = 3 * 256 + 3, ['>'] = 3 * 256 + 3, ['<='] = 3 * 256 + 3,
 9 |     ['and']= 2 * 256 + 2, ['or'] = 1 * 256 + 1,
10 |     ['**'] = 10* 256 + 10,
11 |     ['^^'] = 12* 256 + 11, -- right associative
12 | }
13 | 
14 | local unaop = {
15 |     ['#']   = 8,
16 |     ['-']   = 8,
17 |     ['not'] = 8,
18 |     ['`']   = 13, -- highest
19 | }
20 | 
21 | local function unary_priority(op)
22 |     return unaop[op]
23 | end
24 | 
25 | -- Pseudo priority of a simple identifier. Should be higher than any
26 | -- others operator's priority.
27 | local ident_priority = 16
28 | 
29 | local function is_binop(op)
30 |     return binop[op]
31 | end
32 | 
33 | local function left_priority(op)
34 |     return bit.rshift(binop[op], 8)
35 | end
36 | 
37 | local function right_priority(op)
38 |     return bit.band(binop[op], 0xff)
39 | end
40 | 
41 | return {
42 |     is_binop       = is_binop,
43 |     left_priority  = left_priority,
44 |     right_priority = right_priority,
45 |     unary_priority = unary_priority,
46 |     ident_priority = ident_priority,
47 | }
48 | 


--------------------------------------------------------------------------------
/parser.lua:
--------------------------------------------------------------------------------
  1 | local operator = require("sci-lang.operator")
  2 | 
  3 | local LJ_52 = false
  4 | 
  5 | local EndOfBlock = { TK_else = true, TK_elseif = true, TK_end = true, TK_until = true, TK_eof = true }
  6 | 
  7 | local is_algebra_map = {
  8 |     BinaryAlgebraExpression = true,
  9 |     UnaryAlgebraExpression  = true,
 10 |     IndexAlgebraExpression  = true,
 11 | }
 12 | 
 13 | local function is_algebra(node)
 14 |     return is_algebra_map[node.kind]
 15 | end
 16 | 
 17 | local function err_syntax(ls, em)
 18 |   ls:error(ls.token, em)
 19 | end
 20 | 
 21 | local function err_token(ls, token)
 22 |   ls:error(ls.token, "'%s' expected", ls.token2str(token))
 23 | end
 24 | 
 25 | local function checkcond(ls, cond, em)
 26 |     if not cond then err_syntax(ls, em) end
 27 | end
 28 | 
 29 | local function lex_opt(ls, tok)
 30 |     if ls.token == tok then
 31 |         ls:next()
 32 |         return true
 33 |     end
 34 |     return false
 35 | end
 36 | 
 37 | local function lex_check(ls, tok)
 38 |     if ls.token ~= tok then err_token(ls, tok) end
 39 |     ls:next()
 40 | end
 41 | 
 42 | local function lex_match(ls, what, who, line)
 43 |     if not lex_opt(ls, what) then
 44 |         if line == ls.linenumber then
 45 |             err_token(ls, what)
 46 |         else
 47 |             local token2str = ls.token2str
 48 |             ls:error(ls.token, "%s expected (to close %s at line %d)", token2str(what), token2str(who), line)
 49 |         end
 50 |     end
 51 | end
 52 | 
 53 | local function lex_str(ls)
 54 |     if ls.token ~= 'TK_name' and (LJ_52 or ls.token ~= 'TK_goto') then
 55 |         err_token(ls, 'TK_name')
 56 |     end
 57 |     local s = ls.tokenval
 58 |     ls:next()
 59 |     return s
 60 | end
 61 | 
 62 | local expr_primary, expr, expr_unop, expr_binop, expr_simple
 63 | local expr_list, expr_table
 64 | local parse_body, parse_block, parse_args
 65 | 
 66 | local function var_lookup(ast, ls)
 67 |     local name = lex_str(ls)
 68 |     return ast:identifier(name)
 69 | end
 70 | 
 71 | local function expr_field(ast, ls, v)
 72 |     ls:next() -- Skip dot or colon.
 73 |     local key = lex_str(ls)
 74 |     return ast:expr_property(v, key)
 75 | end
 76 | 
 77 | local function expr_bracket(ast, ls, accept_empty)
 78 |     ls:next() -- Skip '['.
 79 |     local v 
 80 |     if accept_empty and lex_opt(ls, ']') then
 81 |         v = nil
 82 |     else
 83 |         v = expr(ast, ls)
 84 |         lex_check(ls, ']')
 85 |     end
 86 |     return v
 87 | end
 88 | 
 89 | function expr_table(ast, ls)
 90 |     local line = ls.linenumber
 91 |     local kvs = {}
 92 |     lex_check(ls, '{')
 93 |     while ls.token ~= '}' do
 94 |         local key
 95 |         if ls.token == '[' then
 96 |             key = expr_bracket(ast, ls)
 97 |             lex_check(ls, '=')
 98 |         elseif (ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto')) and ls:lookahead() == '=' then
 99 |             local name = lex_str(ls)
100 |             key = ast:literal(name)
101 |             lex_check(ls, '=')
102 |         end
103 |         local val = expr(ast, ls)
104 |         kvs[#kvs + 1] = { val, key } -- "key" can be nil.
105 |         if not lex_opt(ls, ',') and not lex_opt(ls, ';') then break end
106 |     end
107 |     lex_match(ls, '}', '{', line)
108 |     return ast:expr_table(kvs, line)
109 | end
110 | 
111 | function expr_simple(ast, ls)
112 |     local tk, val = ls.token, ls.tokenval
113 |     local e
114 |     if tk == 'TK_number' then
115 |         e = ast:literal(val)
116 |     elseif tk == 'TK_string' then
117 |         e = ast:literal(val)
118 |     elseif tk == 'TK_nil' then
119 |         e = ast:literal(nil)
120 |     elseif tk == 'TK_true' then
121 |         e = ast:literal(true)
122 |     elseif tk == 'TK_false' then
123 |         e = ast:literal(false)
124 |     elseif tk == 'TK_dots' then
125 |         if not ls.fs.varargs then
126 |             err_syntax(ls, "cannot use \"...\" outside a vararg function")
127 |         end
128 |         e = ast:expr_vararg()
129 |     elseif tk == '{' then
130 |         return expr_table(ast, ls)
131 |     elseif tk == 'TK_function' then
132 |         ls:next()
133 |         local args, body, proto = parse_body(ast, ls, ls.linenumber, false)
134 |         return ast:expr_function(args, body, proto)
135 |     else
136 |         return expr_primary(ast, ls)
137 |     end
138 |     ls:next()
139 |     return e
140 | end
141 | 
142 | function expr_list(ast, ls)
143 |     local exps = { }
144 |     exps[1] = expr(ast, ls)
145 |     while lex_opt(ls, ',') do
146 |         exps[#exps + 1] = expr(ast, ls)
147 |     end
148 |     local n = #exps
149 |     if n > 0 then
150 |         exps[n] = ast:set_expr_last(exps[n])
151 |     end
152 |     return exps
153 | end
154 | 
155 | function expr_unop(ast, ls)
156 |     local tk = ls.token
157 |     if tk == 'TK_not' or tk == '-' or tk == '#' then
158 |         local line = ls.linenumber
159 |         ls:next()
160 |         local op = ls.token2str(tk)
161 |         local v = expr_binop(ast, ls, operator.unary_priority(op))
162 |         if is_algebra(v) then
163 |             assert(op == '-', 'not yet implemented')
164 |             return ast:expr_algebra_unop(op, v, line)
165 |         else
166 |             return ast:expr_unop(op, v, line)
167 |         end
168 |     else
169 |         local exp = expr_simple(ast, ls)
170 |         if lex_opt(ls, '`') then
171 |             local line = ls.linenumber
172 |             exp = ast:expr_algebra_unop('`', exp, line)
173 |         end
174 |         return exp
175 |     end
176 | end
177 | 
178 | -- Parse binary expressions with priority higher than the limit.
179 | function expr_binop(ast, ls, limit)
180 |     local v = expr_unop(ast, ls)
181 |     local op = ls.token2str(ls.token)
182 |     while operator.is_binop(op) and operator.left_priority(op) > limit do
183 |         local line = ls.linenumber
184 |         ls:next()
185 |         local v2, nextop = expr_binop(ast, ls, operator.right_priority(op))
186 |         if is_algebra(v) or is_algebra(v2) then
187 |             v = ast:expr_algebra_binop(op, v, v2, line)
188 |         else
189 |             v = ast:expr_binop(op, v, v2, line)
190 |         end
191 |         op = nextop
192 |     end
193 |     return v, op
194 | end
195 | 
196 | function expr(ast, ls)
197 |     return expr_binop(ast, ls, 0) -- Priority 0: parse whole expression.
198 | end
199 | 
200 | -- Parse primary expression.
201 | function expr_primary(ast, ls)
202 |     local v, vk
203 |     -- Parse prefix expression.
204 |     if ls.token == '(' then
205 |         local line = ls.linenumber
206 |         ls:next()
207 |         vk, v = 'expr', ast:expr_brackets(expr(ast, ls))
208 |         lex_match(ls, ')', '(', line)
209 |     elseif ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto') then
210 |         vk, v = 'var', var_lookup(ast, ls)
211 |     else
212 |         err_syntax(ls, "unexpected symbol")
213 |     end
214 |     while true do -- Parse multiple expression suffixes.
215 |         local line = ls.linenumber
216 |         if ls.token == '.' then
217 |             vk, v = 'indexed', expr_field(ast, ls, v)
218 |         elseif ls.token == '[' then
219 |             local key = expr_bracket(ast, ls, true)
220 |             if key then
221 |                 vk, v = 'indexed', ast:expr_index(v, key)
222 |             else
223 |                 vk, v = 'indexed', ast:expr_algebra_index(v)
224 |             end
225 |         elseif ls.token == ':' then
226 |             ls:next()
227 |             local key = lex_str(ls)
228 |             local args = parse_args(ast, ls)
229 |             vk, v = 'call', ast:expr_method_call(v, key, args, line)
230 |         elseif ls.token == '(' or ls.token == 'TK_string' or ls.token == '{' then
231 |             local args = parse_args(ast, ls)
232 |             vk, v = 'call', ast:expr_function_call(v, args, line)
233 |         else
234 |             break
235 |         end
236 |     end
237 |     return v, vk
238 | end
239 | 
240 | -- Parse statements ----------------------------------------------------
241 | 
242 | 
243 | -- Parse 'return' statement.
244 | local function parse_return(ast, ls, line)
245 |     ls:next() -- Skip 'return'.
246 |     ls.fs.has_return = true
247 |     local exps
248 |     if EndOfBlock[ls.token] or ls.token == ';' then -- Base return.
249 |         exps = { }
250 |     else -- Return with one or more values.
251 |         exps = expr_list(ast, ls)
252 |     end
253 |     return ast:return_stmt(exps, line)
254 | end
255 | 
256 | -- Parse numeric 'for'.
257 | local function parse_for_num(ast, ls, varname, line)
258 |     lex_check(ls, '=')
259 |     local init = expr(ast, ls)
260 |     lex_check(ls, ',')
261 |     local last = expr(ast, ls)
262 |     local step
263 |     if lex_opt(ls, ',') then
264 |         step = expr(ast, ls)
265 |     else
266 |         step = ast:literal(1)
267 |     end
268 |     lex_check(ls, 'TK_do')
269 |     local body = parse_block(ast, ls, line)
270 |     local var = ast:identifier(varname)
271 |     return ast:for_stmt(var, init, last, step, body, line, ls.linenumber)
272 | end
273 | 
274 | -- Parse 'for' iterator.
275 | local function parse_for_iter(ast, ls, indexname)
276 |     local vars = { ast:identifier(indexname) }
277 |     while lex_opt(ls, ',') do
278 |         vars[#vars + 1] = ast:identifier(lex_str(ls))
279 |     end
280 |     lex_check(ls, 'TK_in')
281 |     local line = ls.linenumber
282 |     local exps = expr_list(ast, ls)
283 |     lex_check(ls, 'TK_do')
284 |     local body = parse_block(ast, ls, line)
285 |     return ast:for_iter_stmt(vars, exps, body, line, ls.linenumber)
286 | end
287 | 
288 | -- Parse 'for' statement.
289 | local function parse_for(ast, ls, line)
290 |     ls:next()  -- Skip 'for'.
291 |     local varname = lex_str(ls)  -- Get first variable name.
292 |     local stmt
293 |     if ls.token == '=' then
294 |         stmt = parse_for_num(ast, ls, varname, line)
295 |     elseif ls.token == ',' or ls.token == 'TK_in' then
296 |         stmt = parse_for_iter(ast, ls, varname)
297 |     else
298 |         err_syntax(ls, "'=' or 'in' expected")
299 |     end
300 |     lex_match(ls, 'TK_end', 'TK_for', line)
301 |     return stmt
302 | end
303 | 
304 | local function parse_repeat(ast, ls, line)
305 |     ast:fscope_begin()
306 |     ls:next() -- Skip 'repeat'.
307 |     local body = parse_block(ast, ls)
308 |     local lastline = ls.linenumber
309 |     lex_match(ls, 'TK_until', 'TK_repeat', line)
310 |     local cond = expr(ast, ls) -- Parse condition.
311 |     ast:fscope_end()
312 |     return ast:repeat_stmt(cond, body, line, lastline)
313 | end
314 | 
315 | -- Parse function argument list.
316 | function parse_args(ast, ls)
317 |     local line = ls.linenumber
318 |     local args
319 |     if ls.token == '(' then
320 |         if not LJ_52 and line ~= ls.lastline then
321 |             err_syntax(ls, "ambiguous syntax (function call x new statement)")
322 |         end
323 |         ls:next()
324 |         if ls.token ~= ')' then -- Not f().
325 |             args = expr_list(ast, ls)
326 |         else
327 |             args = { }
328 |         end
329 |         lex_match(ls, ')', '(', line)
330 |     elseif ls.token == '{' then
331 |         local a = expr_table(ast, ls)
332 |         args = { a }
333 |     elseif ls.token == 'TK_string' then
334 |         local a = ls.tokenval
335 |         ls:next()
336 |         args = { ast:literal(a) }
337 |     else
338 |         err_syntax(ls, "function arguments expected")
339 |     end
340 |     return args
341 | end
342 | 
343 | local function parse_assignment(ast, ls, vlist, var, vk)
344 |     local line = ls.linenumber
345 |     checkcond(ls, vk == 'var' or vk == 'indexed', 'syntax error')
346 |     vlist[#vlist+1] = var
347 |     if lex_opt(ls, ',') then
348 |         local n_var, n_vk = expr_primary(ast, ls)
349 |         return parse_assignment(ast, ls, vlist, n_var, n_vk)
350 |     else -- Parse RHS.
351 |         lex_check(ls, '=')
352 |         local exps = expr_list(ast, ls)
353 |         local algebra = false
354 |         for i=1,#vlist do
355 |             algebra = algebra or is_algebra(vlist[i])
356 |         end
357 |         if algebra then
358 |             assert(#vlist == 1, 'not yet implemented')
359 |             return ast:assignment_algebra_expr(vlist, exps, line)
360 |         else
361 |             return ast:assignment_expr(vlist, exps, line)
362 |         end
363 |     end
364 | end
365 | 
366 | local function parse_call_assign(ast, ls)
367 |     local var, vk = expr_primary(ast, ls)
368 |     if vk == 'call' then
369 |         return ast:new_statement_expr(var, ls.linenumber)
370 |     else
371 |         local vlist = { }
372 |         return parse_assignment(ast, ls, vlist, var, vk)
373 |     end
374 | end
375 | 
376 | -- Parse 'local' statement.
377 | local function parse_local(ast, ls)
378 |     local line = ls.linenumber
379 |     if lex_opt(ls, 'TK_function') then -- Local function declaration.
380 |         local name = lex_str(ls)
381 |         local args, body, proto = parse_body(ast, ls, line, false)
382 |         return ast:local_function_decl(name, args, body, proto)
383 |     else -- Local variable declaration.
384 |         local vl = { }
385 |         repeat -- Collect LHS.
386 |             vl[#vl+1] = lex_str(ls)
387 |         until not lex_opt(ls, ',')
388 |         local exps
389 |         if lex_opt(ls, '=') then -- Optional RHS.
390 |             exps = expr_list(ast, ls)
391 |         else
392 |             exps = { }
393 |         end
394 |         return ast:local_decl(vl, exps, line)
395 |     end
396 | end
397 | 
398 | local function parse_func(ast, ls, line)
399 |     local needself = false
400 |     ls:next() -- Skip 'function'.
401 |     -- Parse function name.
402 |     local v = var_lookup(ast, ls)
403 |     while ls.token == '.' do -- Multiple dot-separated fields.
404 |         v = expr_field(ast, ls, v)
405 |     end
406 |     if ls.token == ':' then -- Optional colon to signify method call.
407 |         needself = true
408 |         v = expr_field(ast, ls, v)
409 |     end
410 |     local args, body, proto = parse_body(ast, ls, line, needself)
411 |     return ast:function_decl(v, args, body, proto)
412 | end
413 | 
414 | local function parse_while(ast, ls, line)
415 |     ls:next() -- Skip 'while'.
416 |     local cond = expr(ast, ls)
417 |     ast:fscope_begin()
418 |     lex_check(ls, 'TK_do')
419 |     local body = parse_block(ast, ls)
420 |     local lastline = ls.linenumber
421 |     lex_match(ls, 'TK_end', 'TK_while', line)
422 |     ast:fscope_end()
423 |     return ast:while_stmt(cond, body, line, lastline)
424 | end
425 | 
426 | local function parse_then(ast, ls, tests, line)
427 |     ls:next()
428 |     tests[#tests+1] = expr(ast, ls)
429 |     lex_check(ls, 'TK_then')
430 |     return parse_block(ast, ls, line)
431 | end
432 | 
433 | local function parse_if(ast, ls, line)
434 |     local tests, blocks = { }, { }
435 |     blocks[1] = parse_then(ast, ls, tests, line)
436 |     while ls.token == 'TK_elseif' do
437 |         blocks[#blocks+1] = parse_then(ast, ls, tests, ls.linenumber)
438 |     end
439 |     local else_branch
440 |     if ls.token == 'TK_else' then
441 |         local eline = ls.linenumber
442 |         ls:next() -- Skip 'else'.
443 |         else_branch = parse_block(ast, ls, eline)
444 |     end
445 |     lex_match(ls, 'TK_end', 'TK_if', line)
446 |     return ast:if_stmt(tests, blocks, else_branch, line)
447 | end
448 | 
449 | local function parse_label(ast, ls)
450 |     ls:next() -- Skip '::'.
451 |     local name = lex_str(ls)
452 |     lex_check(ls, 'TK_label')
453 |     -- Recursively parse trailing statements: labels and ';' (Lua 5.2 only).
454 |     while true do
455 |         if ls.token == 'TK_label' then
456 |             parse_label(ast, ls)
457 |         elseif LJ_52 and ls.token == ';' then
458 |             ls:next()
459 |         else
460 |             break
461 |         end
462 |     end
463 |     return ast:label_stmt(name, ls.linenumber)
464 | end
465 | 
466 | local function parse_goto(ast, ls)
467 |     local line = ls.linenumber
468 |     local name = lex_str(ls)
469 |     return ast:goto_stmt(name, line)
470 | end
471 | 
472 | -- Parse a statement. Returns the statement itself and a boolean that tells if it
473 | -- must be the last one in a chunk.
474 | local function parse_stmt(ast, ls)
475 |     local line = ls.linenumber
476 |     local stmt
477 |     if ls.token == 'TK_if' then
478 |         stmt = parse_if(ast, ls, line)
479 |     elseif ls.token == 'TK_while' then
480 |         stmt = parse_while(ast, ls, line)
481 |     elseif ls.token == 'TK_do' then
482 |         ls:next()
483 |         local body = parse_block(ast, ls)
484 |         local lastline = ls.linenumber
485 |         lex_match(ls, 'TK_end', 'TK_do', line)
486 |         stmt = ast:do_stmt(body, line, lastline)
487 |     elseif ls.token == 'TK_for' then
488 |         stmt = parse_for(ast, ls, line)
489 |     elseif ls.token == 'TK_repeat' then
490 |         stmt = parse_repeat(ast, ls, line)
491 |     elseif ls.token == 'TK_function' then
492 |         stmt = parse_func(ast, ls, line)
493 |     elseif ls.token == 'TK_local' then
494 |         ls:next()
495 |         stmt = parse_local(ast, ls, line)
496 |     elseif ls.token == 'TK_return' then
497 |         stmt = parse_return(ast, ls, line)
498 |         return stmt, true -- Must be last.
499 |     elseif ls.token == 'TK_break' then
500 |         ls:next()
501 |         stmt = ast:break_stmt(line)
502 |         return stmt, not LJ_52 -- Must be last in Lua 5.1.
503 |     elseif LJ_52 and ls.token == ';' then
504 |         ls:next()
505 |         return parse_stmt(ast, ls)
506 |     elseif ls.token == 'TK_label' then
507 |         stmt = parse_label(ast, ls)
508 |     elseif ls.token == 'TK_goto' then
509 |         if LJ_52 or ls:lookahead() == 'TK_name' then
510 |             ls:next()
511 |             stmt = parse_goto(ast, ls)
512 |         end
513 |     end
514 |     -- If here 'stmt' is "nil" then ls.token didn't match any of the previous rules.
515 |     -- Fall back to call/assign rule.
516 |     if not stmt then
517 |         stmt = parse_call_assign(ast, ls)
518 |     end
519 |     return stmt, false
520 | end
521 | 
522 | local function parse_params(ast, ls, needself)
523 |     lex_check(ls, "(")
524 |     local args = { }
525 |     if needself then
526 |         args[1] = ast:var_declare("self")
527 |     end
528 |     if ls.token ~= ")" then
529 |         repeat
530 |             if ls.token == 'TK_name' or (not LJ_52 and ls.token == 'TK_goto') then
531 |                 local name = lex_str(ls)
532 |                 args[#args+1] = ast:var_declare(name)
533 |             elseif ls.token == 'TK_dots' then
534 |                 ls:next()
535 |                 ls.fs.varargs = true
536 |                 args[#args + 1] = ast:expr_vararg()
537 |                 break
538 |             else
539 |                 err_syntax(ls, "<name> or \"...\" expected")
540 |             end
541 |         until not lex_opt(ls, ',')
542 |     end
543 |     lex_check(ls, ")")
544 |     return args
545 | end
546 | 
547 | local function new_proto(ls, varargs)
548 |     return { varargs = varargs }
549 | end
550 | 
551 | local function parse_block_stmts(ast, ls)
552 |     local firstline = ls.linenumber
553 |     local stmt, islast = nil, false
554 |     local body = { }
555 |     while not islast and not EndOfBlock[ls.token] do
556 |         stmt, islast = parse_stmt(ast, ls)
557 |         body[#body + 1] = stmt
558 |         lex_opt(ls, ';')
559 |     end
560 |     return body, firstline, ls.linenumber
561 | end
562 | 
563 | local function parse_chunk(ast, ls)
564 |     local body, firstline, lastline = parse_block_stmts(ast, ls)
565 |     return ast:chunk(body, ls.chunkname, 0, lastline)
566 | end
567 | 
568 | -- Parse body of a function.
569 | function parse_body(ast, ls, line, needself)
570 |     local pfs = ls.fs
571 |     ls.fs = new_proto(ls, false)
572 |     ast:fscope_begin()
573 |     ls.fs.firstline = line
574 |     local args = parse_params(ast, ls, needself)
575 |     local body = parse_block(ast, ls)
576 |     ast:fscope_end()
577 |     local proto = ls.fs
578 |     if ls.token ~= 'TK_end' then
579 |         lex_match(ls, 'TK_end', 'TK_function', line)
580 |     end
581 |     ls.fs.lastline = ls.linenumber
582 |     ls:next()
583 |     ls.fs = pfs
584 |     return args, body, proto
585 | end
586 | 
587 | function parse_block(ast, ls, firstline)
588 |     ast:fscope_begin()
589 |     local body = parse_block_stmts(ast, ls)
590 |     body.firstline, body.lastline = firstline, ls.linenumber
591 |     ast:fscope_end()
592 |     return body
593 | end
594 | 
595 | local function parse(ast, ls)
596 |     ls:next()
597 |     ls.fs = new_proto(ls, true)
598 |     ast:fscope_begin()
599 |     local chunk = parse_chunk(ast, ls)
600 |     ast:fscope_end()
601 |     if ls.token ~= 'TK_eof' then
602 |         err_token(ls, 'TK_eof')
603 |     end
604 |     return chunk
605 | end
606 | 
607 | return parse
608 | 


--------------------------------------------------------------------------------
/reader.lua:
--------------------------------------------------------------------------------
 1 | local strsub = string.sub
 2 | 
 3 | local function new_string_reader(src)
 4 |     local pos = 1
 5 |     local function reader()
 6 |         local chunk = strsub(src, pos, pos + 4096 - 32)
 7 |         pos = pos + #chunk
 8 |         return #chunk > 0 and chunk or nil
 9 |     end
10 |     return reader
11 | end
12 | 
13 | local function new_file_reader(filename)
14 |     local f
15 |     if filename then
16 |         f = assert(io.open(filename, 'r'), "cannot open file " .. filename)
17 |     else
18 |         f = io.stdin
19 |     end
20 |     local function reader()
21 |         return f:read(4096 - 32)
22 |     end
23 |     return reader
24 | end
25 | 
26 | return { 
27 |     string = new_string_reader,
28 |     file = new_file_reader,
29 | }
30 | 


--------------------------------------------------------------------------------
/transform.lua:
--------------------------------------------------------------------------------
  1 | local lua_ast = require('sci-lang.lua-ast')
  2 | 
  3 | local function add_body(body, ...)
  4 |   local arg = { ... }
  5 |   for i=1,#arg do
  6 |     body[#body + 1] = arg[i]
  7 |   end
  8 | end
  9 | 
 10 | local function aexpr_index(ast, var, line)
 11 |   local p_idx = ast:expr_property(var, '_p', line)
 12 |   return ast:expr_index(p_idx, ast:identifier('__i'), line)
 13 | end
 14 | 
 15 | local function aexpr_loop1(ast, lhs, rhs)
 16 |   return ast:for_stmt(
 17 |     ast:identifier('__i', 1), 
 18 |     ast:literal(0, 1),
 19 |     ast:expr_binop('-', ast:expr_property(lhs, '_n', 1), ast:literal(1, 1), 1),
 20 |     nil, 
 21 |     { ast:assignment_expr({ aexpr_index(ast, lhs, 1) }, { rhs }, 1) }, 
 22 |     1, 1)
 23 | end
 24 | 
 25 | local function aexpr_dim(ast, what, arrays)
 26 |   return ast:expr_function_call(ast:identifier('__dim_'..what, 1), arrays, 1)
 27 | end
 28 | 
 29 | local function aexpr_terminal(ast, node, fargs, fvals)
 30 |   assert(#fargs == #fvals)
 31 |   local kind, ivar = node.kind, #fargs + 1
 32 |   fargs[ivar] = ast:identifier('__x'..ivar, 1)
 33 |   if kind == 'IndexAlgebraExpression' then
 34 |     fvals[ivar] = node.object
 35 |   elseif kind == 'Identifier' or kind == 'Literal' then
 36 |     fvals[ivar] = node
 37 |   end
 38 |   return fargs[ivar]
 39 | end
 40 | 
 41 | local aexpr_set
 42 | 
 43 | local function aexpr_linear_access(ast, node, fbody, fargs, fvals, temps, arrays)
 44 |   assert(type(temps) == 'table')
 45 |   local kind, operator = node.kind, node.operator
 46 |   if kind == 'IndexAlgebraExpression' then
 47 |     local var = aexpr_terminal(ast, node, fargs, fvals)
 48 |     arrays[#arrays + 1] = var
 49 |     return aexpr_index(ast, var, 1)
 50 |   elseif kind == 'Identifier' or kind == 'Literal' then
 51 |     return aexpr_terminal(ast, node, fargs, fvals)
 52 |   elseif kind == 'UnaryAlgebraExpression' then
 53 |     return ast:expr_unop(node.operator, aexpr_linear_access(ast, node.argument, fbody, fargs, fvals, temps, arrays), node.line)
 54 |   elseif kind == 'BinaryAlgebraExpression' then
 55 |     if operator == '**' or operator == '^^' then
 56 |       local ivar = #temps + 1
 57 |       temps[ivar] = ast:identifier('__t'..ivar, 1)
 58 |       arrays[#arrays + 1] = temps[ivar]
 59 |       aexpr_set(ast, node, temps[ivar], ast:identifier('__stack_array', 1), fbody, fargs, fvals, temps)
 60 |       return aexpr_index(ast, temps[ivar], 1)
 61 |     else
 62 |       local left  = aexpr_linear_access(ast, node.left,  fbody, fargs, fvals, temps, arrays)
 63 |       local right = aexpr_linear_access(ast, node.right, fbody, fargs, fvals, temps, arrays)
 64 |       return ast:expr_binop(node.operator, left, right, node.line)
 65 |     end
 66 |   end
 67 |   error('internal: unreachable')
 68 | end
 69 | 
 70 | local function aexpr_elw_set(ast, node, out, out_kind, fbody, fargs, fvals, temps)
 71 |   local arrays = { }
 72 |   local access = aexpr_linear_access(ast, node, fbody, fargs, fvals, temps, arrays)
 73 |   local pre
 74 |   if out_kind then
 75 |     local __dim = aexpr_dim(ast, 'elw_'..(#arrays), arrays) 
 76 |     pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { arrays[1], __dim }, 1) }, 1)
 77 |   else
 78 |     local __dim = aexpr_dim(ast, 'elw_'..(#arrays + 1), { out, unpack(arrays) })
 79 |     pre = ast:new_statement_expr(__dim, 1)
 80 |   end
 81 |   local elw = aexpr_loop1(ast, out, access)
 82 |   return pre, elw
 83 | end
 84 | 
 85 | local function aexpr_singlify(ast, node, fbody, fargs, fvals, temps)
 86 |   local kind, transpose = node.kind, false
 87 |   if kind == 'UnaryAlgebraExpression' and node.operator == '`' then
 88 |     transpose = true
 89 |     node = node.argument
 90 |     kind = node.kind
 91 |   end
 92 |   transpose = ast:literal(transpose, 1)
 93 |   if kind == 'IndexAlgebraExpression' or kind == 'Identifier' or kind == 'Literal' then
 94 |     return aexpr_terminal(ast, node, fargs, fvals), transpose
 95 |   else
 96 |     local ivar = #temps + 1
 97 |     temps[ivar] = ast:identifier('__t'..ivar, 1)
 98 |     aexpr_set(ast, node, temps[ivar], ast:identifier('__stack_array', 1), fbody, fargs, fvals, temps)
 99 |     return temps[ivar], transpose
100 |   end
101 | end
102 | 
103 | local function aexpr_mul_set(ast, out, out_kind, left, right, left_tr, right_tr)
104 |   local __mul = ast:identifier('__mul', 1)
105 |   local pre
106 |   if out_kind then
107 |     local __dim = aexpr_dim(ast, 'mul_2', { left, right, left_tr, right_tr }) 
108 |     pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { left, __dim }, 1) }, 1)
109 |   else
110 |     local __dim = aexpr_dim(ast, 'mul_3', { out, left, right, left_tr, right_tr })
111 |     pre = ast:new_statement_expr(__dim, 1)
112 |   end
113 |   local mul = ast:new_statement_expr(ast:expr_function_call(__mul, { out, left, right, left_tr, right_tr }, 1), 1)
114 |   return pre, mul
115 | end
116 | 
117 | local function aexpr_pow_set(ast, out, out_kind, left, right)
118 |   local __pow = ast:identifier('__pow', 1)
119 |   local pre
120 |   if out_kind then
121 |     local __dim = aexpr_dim(ast, 'pow_1', { left }) 
122 |     pre = ast:local_decl({ out.name }, { ast:expr_function_call(out_kind, { left, __dim }, 1) }, 1)
123 |   else
124 |     local __dim = aexpr_dim(ast, 'pow_2', { out, left })
125 |     pre = ast:new_statement_expr(__dim, 1)
126 |   end
127 |   local pow = ast:new_statement_expr(ast:expr_function_call(__pow, { out, left, right }, 1), 1)
128 |   return pre, pow
129 | end
130 | 
131 | aexpr_set = function(ast, node, out, out_kind, fbody, fargs, fvals, temps)
132 |   local kind, operator = node.kind, node.operator
133 |   if kind == 'BinaryAlgebraExpression' and (operator == '**' or operator == '^^') then
134 |     local left,  left_tr  = aexpr_singlify(ast, node.left,  fbody, fargs, fvals, temps)
135 |     local right, right_tr = aexpr_singlify(ast, node.right, fbody, fargs, fvals, temps)
136 |     if operator == '**' then
137 |       add_body(fbody, aexpr_mul_set(ast, out, out_kind, left, right, left_tr, right_tr))
138 |     else
139 |       add_body(fbody, aexpr_pow_set(ast, out, out_kind, left, right))
140 |     end 
141 |   else
142 |     add_body(fbody, aexpr_elw_set(ast, node, out, out_kind, fbody, fargs, fvals, temps))
143 |   end
144 | end
145 | 
146 | local expr_count = 0
147 | local proto = { firstline = 1, lastline = 1 }
148 | 
149 | local function aexpr_clear(ast, temps, fbody)
150 |   if #temps > 0 then
151 |     add_body(fbody, ast:new_statement_expr(ast:expr_function_call(ast:identifier('__stack_clear', 1), { }, 1), 1))
152 |   end
153 | end
154 | 
155 | local function aexpr_root(ast, fargs, fvals, set_node, out_kind, return_stmt)
156 |   expr_count = expr_count + 1
157 | 
158 |   local __r1 = ast:identifier('__r1', 1)
159 |   local fbody, temps = { }, { }
160 | 
161 |   aexpr_set(ast, set_node, __r1, out_kind, fbody, fargs, fvals, temps)
162 |   aexpr_clear(ast, temps, fbody)
163 |   add_body(fbody, return_stmt)
164 |   fbody.lastline = 1
165 | 
166 |   ast.pre[#ast.pre + 1] = ast:local_function_decl('__aexpr_'..expr_count, fargs, fbody, proto) 
167 |   return ast:expr_function_call(ast:identifier('__aexpr_'..expr_count, 1), fvals, 1)
168 | end
169 | 
170 | local function aexpr_new(ast, node)
171 |   local __r1 = ast:identifier('__r1', 1)
172 |   return aexpr_root(ast, { }, { }, node, ast:identifier('__array_alloc', 1), ast:return_stmt({ __r1 }, 1))
173 | end
174 | 
175 | local function aexpr_assign(ast, node)
176 |   local __r1 = ast:identifier('__r1', 1)
177 |    return aexpr_root(ast, { __r1 }, { node.left[1].object }, node.right[1], nil, nil)
178 | end
179 | 
180 | local transform_map = {
181 |   IndexAlgebraExpression  = aexpr_new,
182 |   UnaryAlgebraExpression  = aexpr_new,
183 |   BinaryAlgebraExpression = aexpr_new,
184 |   AssignmentAlgebraExpression = aexpr_assign,
185 | }
186 | 
187 | local function transform(ast, node)
188 |   if type(node) == 'table' then
189 |     local transform_kind = transform_map[node.kind] -- Fails if not node.
190 |     if transform_kind then -- To be transformed nodes.
191 |       return transform_kind(ast, node)
192 |     else -- Not to be transformed nodes.
193 |       local o = { }
194 |       for k,v in pairs(node) do
195 |         o[k] = transform(ast, v)
196 |       end
197 |       return o
198 |     end
199 |   end
200 |   return node -- Not nodes.
201 | end
202 | 
203 | local function localize(ast, what, from, line)
204 |   local lhs, rhs = { }, { }
205 |   for i,k in ipairs(what) do
206 |     lhs[i] = '__'..k
207 |     rhs[i] = ast:expr_property(from, k, line)
208 |   end
209 |   return ast:local_decl(lhs, rhs, line)
210 | end
211 | 
212 | local function pre_init(ast)
213 |   local dim_elw_x = { }
214 |   for i=1,10 do dim_elw_x[i] = 'dim_elw_'..i end
215 |   local __alg = ast:identifier('__alg', 1)
216 |   return { 
217 |     ast:local_decl(
218 |       { __alg.name }, 
219 |       { ast:expr_property(
220 |           ast:expr_function_call(ast:identifier('require', 1), { ast:literal('sci.alg', 1) }, 1),
221 |           '__',
222 |           1), },
223 |       1),
224 |     localize(ast, { 'mul', 'pow', 'dim_mul_2', 'dim_mul_3', 'dim_pow_1', 'dim_pow_2', 'stack_array', 'stack_clear', 'array_alloc' }, __alg, 1),
225 |     localize(ast, dim_elw_x, __alg, 1)
226 |   }
227 | end
228 | 
229 | local function root(tree)
230 |   local tast = lua_ast.New()
231 |   tast:fscope_begin()
232 |   tast.pre = pre_init(tast)
233 |   local valid_tree = transform(tast, tree)
234 |   for i=1,#tast.pre do
235 |     table.insert(valid_tree.body, i, tast.pre[i])
236 |   end
237 |   tast:fscope_end()
238 |   return valid_tree
239 | end
240 | 
241 | return {
242 |   root = root,
243 | }
244 | 
245 | 


--------------------------------------------------------------------------------
/util.lua:
--------------------------------------------------------------------------------
 1 | local exports = { }
 2 | 
 3 | local function dump(node, level)
 4 |    if not level then level = 1 end
 5 |    if type(node) == 'nil' then
 6 |       return "null"
 7 |    end
 8 |    if type(node) == "string" then
 9 |       return '"'..node..'"'
10 |    end
11 |    if type(node) == "number" then
12 |       return node
13 |    end
14 |    if type(node) == "boolean" or type(node) == "cdata" then
15 |       return tostring(node)
16 |    end
17 |    if type(node) == "function" then
18 |       return tostring(node)
19 |    end
20 | 
21 |    local buff = { }
22 |    local dent = string.rep("    ", level)
23 |    local tput = table.insert
24 | 
25 |    if #node == 0 and next(node, nil) then
26 |       tput(buff, "{")
27 |       local i_buff = { }
28 |       local p_buff = { }
29 |       for k,data in pairs(node) do
30 |          tput(buff, "\n"..dent..dump(k)..': '..dump(data, level + 1))
31 |          if next(node, k) then
32 |             tput(buff, ",")
33 |          end
34 |       end
35 |       tput(buff, "\n"..string.rep("    ", level - 1).."}")
36 |    else
37 |       tput(buff, "[")
38 |       for i,data in pairs(node) do
39 |          tput(buff, "\n"..dent..dump(data, level + 1))
40 |          if i ~= #node then
41 |             tput(buff, ",")
42 |          end
43 |       end
44 |       tput(buff, "\n"..string.rep("    ", level - 1).."]")
45 |    end
46 | 
47 |    return table.concat(buff, "")
48 | end
49 | 
50 | exports.dump = dump
51 | 
52 | return exports
53 | 


--------------------------------------------------------------------------------