├── .github
└── FUNDING.yml
├── LICENSE
├── README.md
├── base
├── ast.lua
├── datareader.lua
├── parser.lua
└── tokenizer.lua
├── distinfo
├── grammar
├── parser.lua
└── tokenizer.lua
├── load_xform.lua
├── lua
├── ast.lua
├── parser.lua
└── tokenizer.lua
├── parser.lua
├── parser.rockspec
├── syntax_5.0.txt
├── syntax_5.1.txt
├── syntax_5.2.txt
├── syntax_5.3.txt
├── syntax_5.4.txt
├── syntax_ast_5.1.txt
├── syntax_grammar.txt
└── tests
├── flatten.lua
├── lua_to_c.lua
├── lua_to_c_test.lua
├── parse.lua
├── parsemyself.lua
├── spantest.lua
├── strings.lua
├── validate-key.txt
└── validate.lua
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: thenumbernine # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: thenumbernine # Replace with a single Buy Me a Coffee username
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017-2025 Christopher E. Moore ( christopher.e.moore@gmail.com / http://thenumbernine.github.io )
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://buy.stripe.com/00gbJZ0OdcNs9zi288)
2 |
3 | # Lua Parser in Lua
4 |
5 | Parses to an abstract syntax tree representation.
6 | Call tostring() on the AST to get equivalent Lua code.
7 |
8 | Works for versions ~~5.1 5.2~~ 5.3 5.4 and Luajit. I broke <=5.2 compatability when I resorted to throwing objects for parse error reporting.
9 |
10 | AST also contains some functions like flatten() for use with optimizing / auto-inlining Lua.
11 |
12 | See the tests folder for example usage.
13 |
14 | ### Reference
15 |
16 | `Parser = require 'parser'`
17 | This will return the parser class.
18 |
19 | `result, msg = Parser.parse(data[, source, version, useluajit])`
20 | This parses the code in `data` and returns an `ast._block` object.
21 | This is shorthand for `Parser(data, source, version, useluajit).tree`
22 | `version` is a string `'5.3', '5.4'`, etc., corresponding to your Lua version.
23 | The `Parser` object has a few more functions to it corresponding with internal use while parsing.
24 | `source` is a description of the source, i.e. filename, which is included in some nodes (functions) for information on where they are declared.
25 | Returns `result` in case of success. If it encounters a parse error returns `false` and `msg` as what went wrong.
26 |
27 | `ast = require 'parser.lua.ast'`
28 | This is the AST (abstract syntax tree) library,
29 | it hold a collection of AST classes, each representing a different token in the Lua syntax.
30 |
31 |
32 | `n = ast.node()`
33 | = This is the superclass of all AST classes.
34 |
35 | Each has the following properties:
36 |
37 | `n.type` = returns the type of the node, coinciding with the classname in the `ast` library with underscore removed.
38 |
39 | `n.span` = source code span information (`from` and `to` subtables each with `source`, `line` and `col` fields)
40 |
41 | `n:copy()` = returns a copy of the node.
42 |
43 | `n:flatten(func, varmap)` = flattens / inlines the contents of all function call of this function. Used for performance optimizations.
44 |
45 | `n:toLua()` = generate Lua code. same as the node's `__tostring`.
46 |
47 | `n:serialize(apply)` = apply a to-string serialization function to the AST.
48 |
49 | ## ast.node subclasses:
50 |
51 | `n = ast._block(...)` = a block of code in Lua.
52 | `...` is a list of initial child `stmt` nodes to populate the `block` node with.
53 | `n.type == 'block'`.
54 | `n[1] ... n[#n] =` nodes of statements within the block.
55 |
56 | `n = ast._stmt()` = a statement-node parent-class.
57 |
58 | `n = ast._assign(vars, exprs)` =
59 | An assignment operation.
60 | Subclass of `_stmt`.
61 | `n.type == 'assign'`.
62 | Represents the assignment of `n.vars` to `n.exprs`.
63 |
64 | `n = ast._do(...)` =
65 | A `do ... end` block.
66 | Subclass of `_stmt`.
67 | `n.type == 'do'`.
68 | `n[1] ... n[#n] =` nodes of statements within the block.
69 |
70 | `n = ast._while(cond, ...)` =
71 | A `while cond do ... end` block.
72 | Subclass of `_stmt`.
73 | `n.type == 'while'`.
74 | `n.cond` holds the condition expression.
75 | `n[1] ... n[#n] =` nodes of statements within the block.
76 |
77 | `n = ast._repeat(cond, ...)` =
78 | A `repeat ... until cond` block.
79 | Subclass of `_stmt`.
80 | `n.type == 'repeat'`.
81 | `n.cond` holds the condition expression.
82 | `n[1] ... n[#n] =` nodes of statements within the block.
83 |
84 | `n = ast._if(cond, ...)` =
85 | A `if cond then ... elseif ... else ... end` block.
86 | Subclass of `_stmt`.
87 | `n.type == 'if'`.
88 | `n.cond` holds the condtion expression of the first `if` statement.
89 | All subsequent arguments must be `ast._elseif` objects, optionally with a final `ast._else` object.
90 | `n.elseifs` holds the `ast._elseif` objects.
91 | `n.elsestmt` optionally holds the final `ast._else`.
92 |
93 | `n = ast._elseif(cond, ...)` =
94 | A `elseif cond then ...` block.
95 | Subclass of `_stmt`.
96 | `n.type == 'elseif'`.
97 | `n.cond` holds the condition expression of the `else` statement.
98 | `n[1] ... n[#n] =` nodes of statements within the block.
99 |
100 | `n = ast._else(...)` =
101 | A `else ...` block.
102 | `n.type == 'else'`.
103 | `n[1] ... n[#n] =` nodes of statements within the block.
104 |
105 | `n = ast._foreq(var, min, max, step, ...)` =
106 | A `for var=min,max[,step] do ... end` block.
107 | Subclass of `_stmt`.
108 | `n.type == 'foreq'`.
109 | `n.var =` the variable node.
110 | `n.min =` the min expression.
111 | `n.max =` the max expression.
112 | `n.step =` the optional step expression.
113 | `n[1] ... n[#n] =` nodes of statements within the block.
114 |
115 | `n = ast._forin(vars, iterexprs, ...)`
116 | A `for var1,...varN in expr1,...exprN do ... end` block.
117 | Subclass of `_stmt`.
118 | `n.type == 'forin'`.
119 | `n.vars = ` table of variables of the for-in loop.
120 | `n.iterexprs = ` table of iterator expressions of the for-in loop.
121 | `n[1] ... n[#n] =` nodes of statements within the block.
122 |
123 | `n = ast._function(name, args, ...)`
124 | A `function [name](arg1, ...argN) ... end` block.
125 | Subclass of `_stmt`.
126 | `n.type == 'function'`.
127 | `n.name = ` the function name. This is optional. Omit name for this to represent lambda function. (Which technically becomes an expression and not a statement...)
128 | `n.args = ` table of arguments. This does get modified: each argument gets assigned an `.param = true`, and an `.index =` for which index it is in the argument list.
129 | `n[1] ... n[#n] =` nodes of statements within the block.
130 |
131 | `n = ast._local(exprs)`
132 | A `local ...` statement.
133 | Subclass of `_stmt`.
134 | `n.type == 'local'`
135 | `n.exprs =` list of expressions to be declared as locals.
136 | Expects its member-expressions to be either functions or assigns.
137 |
138 | `n = ast._return(...)`
139 | A `return ...` statement.
140 | Subclass of `_stmt`.
141 | `n.type == 'return'`
142 | `n.exprs =` list of expressions to return.
143 |
144 | `n = ast._break(...)`
145 | A `break` statement.
146 | Subclass of `_stmt`.
147 | `n.type == 'break'`
148 |
149 | `n = ast._call(func, ...)`
150 | A `func(...)` function-call expression.
151 | `n.type == 'call'`
152 | `n.func =` expression of the function to call.
153 | `n.args =` list argument expressions to pass into the function-call.
154 |
155 | `n = ast._nil()`
156 | A `nil` literal expression.
157 | `n.type == 'nil'`.
158 | `n.const == true`.
159 |
160 | `n = ast._boolean()`
161 | The parent class of the `true`/`false` AST nodes.
162 |
163 | `n = ast._true()`
164 | A `true` boolean literal expression
165 | `n.type == 'true'`.
166 | `n.const == true`.
167 | `n.value == true`.
168 | `ast._boolean:isa(n)` evaluates to `true`
169 |
170 | `n = ast._false()`
171 | A `false` boolean literal expression
172 | `n.type == 'true'`.
173 | `n.const == true`.
174 | `n.value == false`.
175 | `ast._boolean:isa(n)` evaluates to `true`
176 |
177 | `n = ast._number(value)`
178 | A numeric literal expression.
179 | `n.type == 'number'`.
180 | `n.value =` the numerical value.
181 |
182 | `n = ast._string(value)`
183 | A string literal expression.
184 | `n.type == 'string'`.
185 | `n.value =` the string value.
186 |
187 | `n = ast._vararg()`
188 | A vararg `...` expression.
189 | `n.type == 'vararg'`.
190 | For use within function arguments, assignment expressions, function calls, etc.
191 |
192 | `n = ast._table(...)`
193 | A table `{ ... }` expression.
194 | `n.type == 'table'`.
195 | `n[1] ... n[#n] =` expressions of the table.
196 | If the expression in `n[i]` is an `ast._assign` then an entry is added into the table as `key = value`. If it is not an `ast._assign` then it is inserted as a sequenced entry.
197 |
198 | `n = ast._var(name)`
199 | A variable reference expression.
200 | `n.type == 'var'`
201 | `n.name =` the variable name.
202 |
203 | `n = ast._par(expr)`
204 | A `( ... )` parenthesis expression.
205 | `n.type == 'par'`.
206 | `n.expr =` the expression within the parenthesis.
207 |
208 | `n = ast._index(expr, key)`
209 | An `expr[key]` expression, i.e. an `__index`-metatable operation.
210 | `n.type == 'index'`.
211 | `n.expr =` the expression to be indexed.
212 | `n.key =` the expression of the index key.
213 |
214 | `n = ast._indexself(expr, key)`
215 | An `expr:key` expression, to be used as the expression of a `ast._ call` node for member-function-calls. These are Lua's shorthand insertion of `self` as the first argument.
216 | `n.type == 'indexself'`.
217 | `n.expr =` the expression to be indexed.
218 | `n.key =` the key to index. Must only be a Lua string, (not an `ast._ string`, but a real Lua string).
219 |
220 | Binary operations:
221 |
222 | |node type|Lua operator| |
223 | |---------|------------|------|
224 | |`_add` |`+` | |
225 | |`_sub` |`-` | |
226 | |`_mul` |`*` | |
227 | |`_div` |`/` | |
228 | |`_mod` |`%` | |
229 | |`_concat`|`..` | |
230 | |`_lt` |`<` | |
231 | |`_le` |`<=` | |
232 | |`_gt` |`>` | |
233 | |`_ge` |`>=` | |
234 | |`_eq` |`==` | |
235 | |`_ne` |`~=` | |
236 | |`_and` |`and` | |
237 | |`_or` |`or` | |
238 | |`_idiv` |`//` | 5.3+ |
239 | |`_band` |`&` | 5.3+ |
240 | |`_bxor` |`~` | 5.3+ |
241 | |`_bor` |`\|` | 5.3+ |
242 | |`_shl` |`<<` | 5.3+ |
243 | |`_shr` |`>>` | 5.3+ |
244 |
245 | `n[1] ... n[#n] =` a table of the arguments of the operation.
246 |
247 | Unary operations:
248 |
249 | |node type|Lua operator| |
250 | |---------|------------|------|
251 | |`_unm` |`-` | |
252 | |`_not` |`not` | |
253 | |`_len` |`#` | |
254 | |`_bnot` |`~` | 5.3+ |
255 |
256 | `n[1] =` the single argument of the operation.
257 |
258 | ## more extra functions:
259 |
260 | Some more useful functions in AST:
261 | - `ast.copy(node)` = equivalent of `node:copy()`
262 | - `ast.flatten(node, func, varmap)` = equivalent of `node:flatten(func, varmap)`
263 | - `ast.refreshparents`
264 | - `ast.traverse`
265 | - `ast.nodeclass(type, parent, args)`
266 | - `ast.tostringmethod` = this specifies the serialization method. It is used to look up the serializer stored in `ast.tostringmethods`
267 | - `parser.load_xform` works with my `ext.load` shim load layer to allow you to modify the AST of all subsequent loaded Lua code.
268 |
269 |
270 | ### TODO:
271 |
272 | - Option for parsing LuaJIT -i number suffixes.
273 | - Speaking of LuaJIT, it has different edge case syntax for 2.0.5, 2.1.0, and whether 5.2-compat is enabled or not. It isn't passing the `validate.lua`.
274 | - How about flags to turn off and on each feature, then a function for auto-detect flag sets based on Lua VERSION string or by running some local `load()` tests
275 | - Make all node allocation routed through `Parser:node` to give the node a .parser field to point back to the parser - necessary for certain AST nodes that need to tell what parser keywords are allowed. I do this where necessary but I should do it always.
276 | - I've also made this keyword test optional since in some rare projects (`vec-lua` for one) I am inserting AST nodes for the sake of a portable AST that I can inject as inline'd code, but without a parser, so I don't have a proper enumeration of keywords. So for now I'm making ast node `.parser` optional and the keyword test bypassed if `.parser` isn't present. I'll probably make it a hard constraint later when I rework `vec-lua`.
277 | - It seems like a quick fix to just convert all `a.b`s into `a['b']`s ... but Lua for some reason doesn't support `a['b']:c()` as an equivalent of `a.b:c()` ... so converting everything from dot to brack index could break some regenerated Lua scripts.
278 | - To preserve spacing and comments (useful for my [`langfix`](https://github.com/thenumbernine/langfix-lua) transpiler), instead of using ast fields which are tokens, I should use token-references as fields and allow them to be replaced ... maybe ...
279 | - I'm very tempted to switch the AST index names to remove the preceding underscore. Pro of keeping it: the keywords become valid Lua names. Pro of removing it: the AST index matches the keyword that the AST node represents ...
280 |
281 | ### Dependencies:
282 |
283 | - https://github.com/thenumbernine/lua-ext
284 | - https://github.com/thenumbernine/lua-template
285 |
286 | `validate-key.txt` originally taken from `minify_tests.txt` at https://github.com/stravant/LuaMinify
287 |
--------------------------------------------------------------------------------
/base/ast.lua:
--------------------------------------------------------------------------------
1 | local table = require 'ext.table'
2 | local string = require 'ext.string'
3 | local class = require 'ext.class'
4 |
5 | local BaseAST = class()
6 |
7 | -- this is too relaxed, since concat maps to tostring maps to toLua, and I want toLua only called from external, and toLua_recursive from internal
8 | --BaseAST.__concat = string.concat
9 |
10 | function BaseAST:setspan(span)
11 | self.span = span
12 | return self
13 | end
14 |
15 | -- returns ancestors as a table, including self
16 | function BaseAST:ancestors()
17 | local n = self
18 | local t = table()
19 | repeat
20 | t:insert(n)
21 | n = n.parent
22 | until not n
23 | return t
24 | end
25 |
26 | -- TODO move traverse flatten etc here once the fields problem is sorted out
27 |
28 | return BaseAST
29 |
--------------------------------------------------------------------------------
/base/datareader.lua:
--------------------------------------------------------------------------------
1 | --[[
2 | TODO
3 | store all tokens(term?) as we go in tokenhistory
4 | then have Tokenizer keep track of the range in this array / forward it to be used as the span in AST
5 | then the AST can look into this array, (maybe also keep track of which tokens are whitespace/comments)
6 | ... and reproduce the original file exactly as-is (if so desired).
7 |
8 | TODO make sure *all* tokens are correctly stored in tokenhistory. right now it doesn't reproduce source in 100% of cases. maybe just 99%.
9 |
10 | TODO terminology ...
11 | DataReader gets chars as input, turns them into ... collections-of-chars?
12 | Tokenizer gets collections-of-chars as input, turns them into tokens
13 | Parser gets tokens as input, turns them into AST nodes
14 | --]]
15 | local table = require 'ext.table'
16 | local class = require 'ext.class'
17 | local assert = require 'ext.assert'
18 |
19 | local DataReader = class()
20 |
21 | -- At the moment this is 100% cosmetic.
22 | -- In case someone doesn't want tracking all tokens done for whatever reason (slowdown, memory, etc)
23 | -- enable/disable this to make token-tracking optional
24 | DataReader.tracktokens = true
25 |
26 | function DataReader:init(data)
27 | self.data = data
28 | self.index = 1
29 |
30 | -- keep track of all tokens as we parse them.
31 | self.tokenhistory = table()
32 |
33 | -- TODO this isn't robust against different OS file formats. maybe switching back to determining line number offline / upon error encounter is better than trying to track it while we parse.
34 | self.line = 1
35 | self.col = 1
36 | end
37 |
38 | function DataReader:done()
39 | return self.index > #self.data
40 | end
41 |
42 | local slashNByte = ('\n'):byte()
43 | function DataReader:updatelinecol()
44 | if not self.lastUpdateLineColIndex then
45 | self.lastUpdateLineColIndex = 1
46 | else
47 | assert.ge(self.index, self.lastUpdateLineColIndex)
48 | end
49 | for i=self.lastUpdateLineColIndex,self.index do
50 | if self.data:byte(i,i) == slashNByte then
51 | self.col = 1
52 | self.line = self.line + 1
53 | else
54 | self.col = self.col + 1
55 | end
56 | end
57 | self.lastUpdateLineColIndex = self.index+1
58 | end
59 |
60 | function DataReader:setlasttoken(lasttoken, skipped)
61 | self.lasttoken = lasttoken
62 | if self.tracktokens then
63 | if skipped and #skipped > 0 then
64 | --DEBUG(@5): print('SKIPPED', require 'ext.tolua'(skipped))
65 | self.tokenhistory:insert(skipped)
66 | end
67 | --DEBUG(@5): print('TOKEN', require 'ext.tolua'(self.lasttoken))
68 | self.tokenhistory:insert(self.lasttoken)
69 | --DEBUG(paranoid): local sofar = self.tokenhistory:concat()
70 | --DEBUG(paranoid): assert.eq(self.data:sub(1,#sofar), sofar, "source vs tokenhistory")
71 | end
72 | return self.lasttoken
73 | end
74 |
75 | function DataReader:seekpast(pattern)
76 | --DEBUG(@5): print('DataReader:seekpast', require 'ext.tolua'(pattern))
77 | local from, to = self.data:find(pattern, self.index)
78 | if not from then return end
79 | local skipped = self.data:sub(self.index, from - 1)
80 | self.index = to + 1
81 | self:updatelinecol()
82 | return self:setlasttoken(self.data:sub(from, to), skipped)
83 | end
84 |
85 | function DataReader:canbe(pattern)
86 | --DEBUG(@5): print('DataReader:canbe', require 'ext.tolua'(pattern))
87 | return self:seekpast('^'..pattern)
88 | end
89 |
90 | function DataReader:mustbe(pattern, msg)
91 | --DEBUG(@5): print('DataReader:mustbe', require 'ext.tolua'(pattern))
92 | if not self:canbe(pattern) then error{msg=msg or "expected "..pattern} end
93 | return self.lasttoken
94 | end
95 |
96 | return DataReader
97 |
--------------------------------------------------------------------------------
/base/parser.lua:
--------------------------------------------------------------------------------
1 | local class = require 'ext.class'
2 | local table = require 'ext.table'
3 | local tolua = require 'ext.tolua'
4 |
5 | local Parser = class()
6 |
7 | -- seems redundant. does anyone need to construct a Parser without data? maybe to modify the syntax or something? just build a subclass in that case?
8 | function Parser:init(data, ...)
9 | if data then
10 | assert(self:setData(data, ...))
11 | end
12 | end
13 |
14 | --[[
15 | returns
16 | true upon success
17 | nil, msg, loc upon failure
18 | --]]
19 | function Parser:setData(data, source)
20 | assert(data, "expected data")
21 | data = tostring(data)
22 | self.source = source
23 | local t = self:buildTokenizer(data)
24 | self.t = t
25 |
26 | -- default entry point for parsing data sources
27 | local parseError
28 | local result = table.pack(xpcall(function()
29 | t:start()
30 | self.tree = self:parseTree()
31 | end, function(err)
32 | -- throw an object if it's an error parsing the code
33 | if type(err) == 'table' then
34 | --DEBUG(@5):print('got parse error:', require'ext.tolua'(err))
35 | --DEBUG(@5):print(debug.traceback())
36 | parseError = err
37 | return
38 | else
39 | return err..'\n'
40 | ..self.t:getpos()..'\n'
41 | ..debug.traceback()
42 | end
43 | end))
44 | if not result[1] then
45 | if not parseError then error(result[2]) end -- internal error
46 | return false, self.t:getpos()..': '..tostring(parseError.msg) -- parsed code error
47 | end
48 |
49 | --
50 | -- now that we have the tree, build parents
51 | -- ... since I don't do that during construction ...
52 | if self.ast
53 | and self.ast.refreshparents
54 | then
55 | self.ast.refreshparents(self.tree)
56 | end
57 |
58 | if self.t.token then
59 | return false, self.t:getpos()..": expected eof, found "..self.t.token
60 | end
61 | return true
62 | end
63 |
64 | -- TODO I don't need all these, just :getloc()
65 | function Parser:getloc()
66 | local loc = self.t:getloc()
67 | loc.source = self.source
68 | return loc
69 | end
70 |
71 | function Parser:canbe(token, tokentype) -- token is optional
72 | assert(tokentype)
73 | if (not token or token == self.t.token)
74 | and tokentype == self.t.tokentype
75 | then
76 | self.lasttoken, self.lasttokentype = self.t.token, self.t.tokentype
77 | self.t:consume()
78 | return self.lasttoken, self.lasttokentype
79 | end
80 | end
81 |
82 | function Parser:mustbe(token, tokentype, opentoken, openloc)
83 | local lasttoken, lasttokentype = self.t.token, self.t.tokentype
84 | self.lasttoken, self.lasttokentype = self:canbe(token, tokentype)
85 | if not self.lasttoken then
86 | local msg = "expected token="..tolua(token).." tokentype="..tolua(tokentype)
87 | .." but found token="..tolua(lasttoken).." type="..tolua(lasttokentype)
88 | if opentoken then
89 | msg = msg .. " to close "..tolua(opentoken).." at line="..openloc.line..' col='..openloc.col
90 | end
91 | error{msg=msg}
92 | end
93 | return self.lasttoken, self.lasttokentype
94 | end
95 |
96 | -- make new ast node, assign it back to the parser (so it can tell what version / keywords / etc are being used)
97 | function Parser:node(index, ...)
98 | --DEBUG(@5):print('Parser:node', index, ...)
99 | local node = self.ast[index](...)
100 | node.parser = self
101 | return node
102 | end
103 |
104 | -- used with parse_expr_precedenceTable
105 | function Parser:getNextRule(rules)
106 | for _, rule in pairs(rules) do
107 | -- TODO why even bother separate it in canbe() ?
108 | local keywordOrSymbol = rule.token:match'^[_a-zA-Z][_a-zA-Z0-9]*$' and 'keyword' or 'symbol'
109 | if self:canbe(rule.token, keywordOrSymbol) then
110 | return rule
111 | end
112 | end
113 | end
114 |
115 | -- a useful tool for specifying lots of precedence level rules
116 | -- used with self.parseExprPrecedenceRulesAndClassNames
117 | -- example in parser/lua/parser.lua
118 | function Parser:parse_expr_precedenceTable(i)
119 | --DEBUG(@5):print('Parser:parse_expr_precedenceTable', i, 'of', #self.parseExprPrecedenceRulesAndClassNames, 'token=', self.t.token)
120 | local precedenceLevel = self.parseExprPrecedenceRulesAndClassNames[i]
121 | if precedenceLevel.unaryLHS then
122 | local from = self:getloc()
123 | local rule = self:getNextRule(precedenceLevel.rules)
124 | if rule then
125 | local nextLevel = i
126 | if rule.nextLevel then
127 | nextLevel = self.parseExprPrecedenceRulesAndClassNames:find(nil, function(level)
128 | return level.name == rule.nextLevel
129 | end) or error{msg="couldn't find precedence level named "..tostring(rule.nextLevel)}
130 | end
131 | local a = assert(self:parse_expr_precedenceTable(nextLevel), {msg='unexpected symbol'})
132 | a = self:node(rule.className, a)
133 | if a.span then
134 | a:setspan{from = a.span.from, to = self:getloc()}
135 | end
136 | return a
137 | end
138 |
139 | if i < #self.parseExprPrecedenceRulesAndClassNames then
140 | return self:parse_expr_precedenceTable(i+1)
141 | else
142 | return self:parse_subexp()
143 | end
144 | else
145 | -- binary operation by default
146 | local a
147 | if i < #self.parseExprPrecedenceRulesAndClassNames then
148 | a = self:parse_expr_precedenceTable(i+1)
149 | else
150 | a = self:parse_subexp()
151 | end
152 | if not a then return end
153 | local rule = self:getNextRule(precedenceLevel.rules)
154 | if rule then
155 | local nextLevel = i
156 | if rule.nextLevel then
157 | nextLevel = self.parseExprPrecedenceRulesAndClassNames:find(nil, function(level)
158 | return level.name == rule.nextLevel
159 | end) or error{msg="couldn't find precedence level named "..tostring(rule.nextLevel)}
160 | end
161 | a = self:node(rule.className, a, (assert(self:parse_expr_precedenceTable(nextLevel), {msg='unexpected symbol'})))
162 | if a.span then
163 | a:setspan{from = a.span.from, to = self:getloc()}
164 | end
165 | end
166 | return a
167 | end
168 | end
169 |
170 |
171 |
172 | return Parser
173 |
--------------------------------------------------------------------------------
/base/tokenizer.lua:
--------------------------------------------------------------------------------
1 | local table = require 'ext.table'
2 | local string = require 'ext.string'
3 | local class = require 'ext.class'
4 | local assert = require 'ext.assert'
5 | local DataReader = require 'parser.base.datareader'
6 |
7 | local Tokenizer = class()
8 |
9 | function Tokenizer:initSymbolsAndKeywords(...)
10 | end
11 |
12 | function Tokenizer:init(data, ...)
13 | -- TODO move what this does to just the subclass initialization
14 | self.symbols = table(self.symbols)
15 | self.keywords = table(self.keywords):setmetatable(nil)
16 | self:initSymbolsAndKeywords(...)
17 |
18 | self.r = DataReader(data)
19 | self.gettokenthread = coroutine.create(function()
20 | local r = self.r
21 |
22 | while not r:done() do
23 | self:skipWhiteSpaces()
24 | if r:done() then break end
25 |
26 | if self:parseComment() then
27 | elseif self:parseString() then
28 | elseif self:parseName() then
29 | elseif self:parseNumber() then
30 | elseif self:parseSymbol() then
31 | else
32 | error{msg="unknown token "..r.data:sub(r.index, r.index+20)..(r.index+20 > #r.data and '...' or '')}
33 | end
34 | end
35 | end)
36 | end
37 |
38 | function Tokenizer:skipWhiteSpaces()
39 | local r = self.r
40 | r:canbe'%s+'
41 | --DEBUG(@5): if r.lasttoken then print('read space ['..(r.index-#r.lasttoken)..','..r.index..']: '..r.lasttoken) end
42 | end
43 |
44 | -- Lua-specific comments (tho changing the comment symbol is easy ...)
45 | Tokenizer.singleLineComment = string.patescape'--'
46 | function Tokenizer:parseComment()
47 | local r = self.r
48 |
49 | -- TODO try block comments first
50 | if self:parseBlockComment() then return true end
51 |
52 | if r:canbe(self.singleLineComment) then
53 | --DEBUG(@5):local start = r.index - #r.lasttoken
54 | -- read line
55 | if not r:seekpast'\n' then
56 | r:seekpast'$'
57 | end
58 | --DEBUG(@5):local commentstr = r.data:sub(start, r.index-1)
59 | -- TODO how to insert comments into the AST? should they be their own nodes?
60 | -- should all whitespace be its own node, so the original code text can be reconstructed exactly?
61 | --coroutine.yield(commentstr, 'comment')
62 | --DEBUG(@5):print('read comment ['..start..','..(r.index-1)..']:'..commentstr)
63 | return true
64 | end
65 | end
66 |
67 | -- parse a string
68 | function Tokenizer:parseString()
69 | if self:parseQuoteString() then return true end
70 | end
71 |
72 | -- TODO this is a very lua function though it's in parser/base/ and not parser/lua/ ...
73 | -- '' or "" single-line quote-strings with escape-codes
74 | function Tokenizer:parseQuoteString()
75 | local r = self.r
76 | if r:canbe'["\']' then
77 | --DEBUG(@5): print('read quote string ['..(r.index-#r.lasttoken)..','..r.index..']: '..r.lasttoken)
78 | --DEBUG(@5): local start = r.index-#r.lasttoken
79 | local quote = r.lasttoken
80 | local s = table()
81 | while true do
82 | r:seekpast'.'
83 | if r.lasttoken == quote then break end
84 | if r:done() then error{msg="unfinished string"} end
85 | if r.lasttoken == '\\' then
86 | local esc = r:canbe'.'
87 | local escapeCodes = {a='\a', b='\b', f='\f', n='\n', r='\r', t='\t', v='\v', ['\\']='\\', ['"']='"', ["'"]="'", ['0']='\0', ['\r']='\n', ['\n']='\n'}
88 | local escapeCode = escapeCodes[esc]
89 | if escapeCode then
90 | s:insert(escapeCode)
91 | elseif esc == 'x' and self.version >= '5.2' then
92 | esc = r:mustbe'%x' .. r:mustbe'%x'
93 | s:insert(string.char(tonumber(esc, 16)))
94 | elseif esc == 'u' and self.version >= '5.3' then
95 | r:mustbe'{'
96 | local code = 0
97 | while true do
98 | local ch = r:canbe'%x'
99 | if not ch then break end
100 | code = code * 16 + tonumber(ch, 16)
101 | end
102 | r:mustbe'}'
103 |
104 | -- hmm, needs bit library or bit operations, which should only be present in version >= 5.3 anyways so ...
105 | local bit = bit32 or require 'bit'
106 | if code < 0x80 then
107 | s:insert(string.char(code)) -- 0xxxxxxx
108 | elseif code < 0x800 then
109 | s:insert(
110 | string.char(bit.bor(0xc0, bit.band(0x1f, bit.rshift(code, 6))))
111 | .. string.char(bit.bor(0x80, bit.band(0x3f, code)))
112 | )
113 | elseif code < 0x10000 then
114 | s:insert(
115 | string.char(bit.bor(0xe0, bit.band(0x0f, bit.rshift(code, 12))))
116 | .. string.char(bit.bor(0x80, bit.band(0x3f, bit.rshift(code, 6))))
117 | .. string.char(bit.bor(0x80, bit.band(0x3f, code)))
118 | )
119 | else
120 | s:insert(
121 | string.char(bit.bor(0xf0, bit.band(0x07, bit.rshift(code, 18))))
122 | .. string.char(bit.bor(0x80, bit.band(0x3f, bit.rshift(code, 12))))
123 | .. string.char(bit.bor(0x80, bit.band(0x3f, bit.rshift(code, 6))))
124 | .. string.char(bit.bor(0x80, bit.band(0x3f, code)))
125 | )
126 | end
127 | elseif esc:match('%d') then
128 | -- can read up to three
129 | if r:canbe'%d' then esc = esc .. r.lasttoken end
130 | if r:canbe'%d' then esc = esc .. r.lasttoken end
131 | s:insert(string.char(tonumber(esc)))
132 | else
133 | if self.version >= '5.2' then
134 | -- lua5.1 doesn't care about bad escape codes
135 | error{msg="invalid escape sequence "..esc}
136 | end
137 | end
138 | else
139 | s:insert(r.lasttoken)
140 | end
141 | end
142 | --DEBUG(@5): print('read quote string ['..start..','..(r.index-#r.lasttoken)..']: '..r.data:sub(start, r.index-#r.lasttoken))
143 | coroutine.yield(s:concat(), 'string')
144 | return true
145 | end
146 | end
147 |
148 | -- C names
149 | function Tokenizer:parseName()
150 | local r = self.r
151 | if r:canbe'[%a_][%w_]*' then -- name
152 | --DEBUG(@5): print('read name ['..(r.index-#r.lasttoken)..', '..r.index..']: '..r.lasttoken)
153 | coroutine.yield(r.lasttoken, self.keywords[r.lasttoken] and 'keyword' or 'name')
154 | return true
155 | end
156 | end
157 |
158 | function Tokenizer:parseNumber()
159 | local r = self.r
160 | if r.data:match('^[%.%d]', r.index) -- if it's a decimal or a number...
161 | and (r.data:match('^%d', r.index) -- then, if it's a number it's good
162 | or r.data:match('^%.%d', r.index)) -- or if it's a decimal then if it has a number following it then it's good ...
163 | then -- otherwise I want it to continue to the next 'else'
164 | -- lua doesn't consider the - to be a part of the number literal
165 | -- instead, it parses it as a unary - and then possibly optimizes it into the literal during ast optimization
166 | --DEBUG(@5): local start = r.index
167 | if r:canbe'0[xX]' then
168 | self:parseHexNumber()
169 | else
170 | self:parseDecNumber()
171 | end
172 | --DEBUG(@5): print('read number ['..start..', '..r.index..']: '..r.data:sub(start, r.index-1))
173 | return true
174 | end
175 | end
176 |
177 | function Tokenizer:parseHexNumber()
178 | local r = self.r
179 | local token = r:mustbe('[%da-fA-F]+', 'malformed number')
180 | coroutine.yield('0x'..token, 'number')
181 | end
182 |
183 | function Tokenizer:parseDecNumber()
184 | local r = self.r
185 | local token = r:canbe'[%.%d]+'
186 | assert.le(#token:gsub('[^%.]',''), 1, 'malformed number')
187 | local n = table{token}
188 | if r:canbe'e' then
189 | n:insert(r.lasttoken)
190 | n:insert(r:mustbe('[%+%-]%d+', 'malformed number'))
191 | end
192 | coroutine.yield(n:concat(), 'number')
193 | end
194 |
195 | function Tokenizer:parseSymbol()
196 | local r = self.r
197 | -- see if it matches any symbols
198 | for _,symbol in ipairs(self.symbols) do
199 | if r:canbe(string.patescape(symbol)) then
200 | --DEBUG(@5): print('read symbol ['..(r.index-#r.lasttoken)..','..r.index..']: '..r.lasttoken)
201 | coroutine.yield(r.lasttoken, 'symbol')
202 | return true
203 | end
204 | end
205 | end
206 |
207 | -- separate this in case someone has to modify the tokenizer symbols and keywords before starting
208 | function Tokenizer:start()
209 | -- TODO provide tokenizer the AST namespace and have it build the tokens (and keywords?) here automatically
210 | self.symbols = self.symbols:mapi(function(v,k) return true, v end):keys()
211 | -- arrange symbols from largest to smallest
212 | self.symbols:sort(function(a,b) return #a > #b end)
213 | self:consume()
214 | self:consume()
215 | end
216 |
217 | function Tokenizer:consume()
218 | -- [[ TODO store these in an array somewhere, make the history adjustable
219 | -- then in all the get[prev][2]loc's just pass an index for how far back to search
220 | self.prev2index = self.previndex
221 | self.prev2tokenIndex = self.prevtokenIndex
222 |
223 | self.previndex = self.r.index
224 | self.prevtokenIndex = #self.r.tokenhistory+1
225 | --]]
226 |
227 | self.token = self.nexttoken
228 | self.tokentype = self.nexttokentype
229 | if coroutine.status(self.gettokenthread) == 'dead' then
230 | self.nexttoken = nil
231 | self.nexttokentype = nil
232 | -- done = true
233 | return
234 | end
235 | local status, nexttoken, nexttokentype = coroutine.resume(self.gettokenthread)
236 | -- detect errors
237 | if not status then
238 | local err = nexttoken
239 | --[[ enabling this to forward errors wasn't so foolproof...
240 | if type(err) == 'table' then
241 | --]]
242 | -- then repackage it and include our parser state
243 | error{
244 | msg = err.msg,
245 | token = self.token,
246 | tokentype = self.tokentype,
247 | pos = self:getpos(),
248 | traceback = debug.traceback(self.gettokenthread),
249 | }
250 | --[[ see above
251 | else
252 | -- internal error - just rethrow
253 | error(err)
254 | end
255 | --]]
256 | end
257 | self.nexttoken = nexttoken
258 | self.nexttokentype = nexttokentype
259 | end
260 |
261 | function Tokenizer:getpos()
262 | return 'line '..self.r.line
263 | ..' col '..self.r.col
264 | ..' code "'..self.r.data:sub(self.r.index):match'^[^\n]*'..'"'
265 | end
266 |
267 | -- return the span across
268 | function Tokenizer:getloc()
269 | local r = self.r
270 | local line = self.r.line
271 | local col = self.r.col
272 |
273 | return {
274 | line = line,
275 | col = col,
276 | index = self.prev2index,
277 | tokenIndex = self.prev2tokenIndex,
278 | }
279 | end
280 |
281 | return Tokenizer
282 |
--------------------------------------------------------------------------------
/distinfo:
--------------------------------------------------------------------------------
1 | name = "parser"
2 | files = {
3 | ["LICENSE"] = "parser/LICENSE",
4 | ["README.md"] = "parser/README.md",
5 | ["base/ast.lua"] = "parser/base/ast.lua",
6 | ["base/datareader.lua"] = "parser/base/datareader.lua",
7 | ["base/parser.lua"] = "parser/base/parser.lua",
8 | ["base/tokenizer.lua"] = "parser/base/tokenizer.lua",
9 | ["grammar/parser.lua"] = "parser/grammar/parser.lua",
10 | ["grammar/tokenizer.lua"] = "parser/grammar/tokenizer.lua",
11 | ["load_xform.lua"] = "parser/load_xform.lua",
12 | ["lua/ast.lua"] = "parser/lua/ast.lua",
13 | ["lua/parser.lua"] = "parser/lua/parser.lua",
14 | ["lua/tokenizer.lua"] = "parser/lua/tokenizer.lua",
15 | ["parser.rockspec"] = "parser/parser.rockspec",
16 | ["parser.lua"] = "parser/parser.lua",
17 | ["syntax_5.0.txt"] = "parser/syntax_5.0.txt",
18 | ["syntax_5.1.txt"] = "parser/syntax_5.1.txt",
19 | ["syntax_5.2.txt"] = "parser/syntax_5.2.txt",
20 | ["syntax_5.3.txt"] = "parser/syntax_5.3.txt",
21 | ["syntax_5.4.txt"] = "parser/syntax_5.4.txt",
22 | ["syntax_ast_5.1.txt"] = "parser/syntax_ast_5.1.txt",
23 | ["syntax_grammar.txt"] = "parser/syntax_grammar.txt",
24 | ["tests/flatten.lua"] = "parser/tests/flatten.lua",
25 | ["tests/lua_to_c.lua"] = "parser/tests/lua_to_c.lua",
26 | ["tests/lua_to_c_test.lua"] = "parser/tests/lua_to_c_test.lua",
27 | ["tests/parse.lua"] = "parser/tests/parse.lua",
28 | ["tests/parsemyself.lua"] = "parser/tests/parsemyself.lua",
29 | ["tests/spantest.lua"] = "parser/tests/spantest.lua",
30 | ["tests/strings.lua"] = "parser/tests/strings.lua",
31 | ["tests/validate-key.txt"] = "parser/tests/validate-key.txt",
32 | ["tests/validate.lua"] = "parser/tests/validate.lua",
33 | }
34 | deps = {
35 | "bit",
36 | "ext",
37 | "template",
38 | }
39 |
--------------------------------------------------------------------------------
/grammar/parser.lua:
--------------------------------------------------------------------------------
1 | --[[
2 | building a parser from a grammar ...
3 | grammar syntax:
4 |
5 | ::= is used to define an AST node with the name `name`
6 | block ::= chunk
7 |
8 | | means multiple optional rules
9 | binop ::= `+` | `-`
10 |
11 | {} means zero-or-more multiple optional rules
12 |
13 | [] means a single optional rule
14 |
15 | '' means a keyword / symbol ... notice keywords are alphabetic only and symbols are non-alphabetic only. The distinction is to enforce that keywords cannot neighbor one another while symbols can, and though keywords are legal variable names (while symbols are not), variables must be checked to ensure they are not keywords. not sure how I'll mix these ...
16 |
17 | ; means end-of-expression-list
18 | I was debating a few ways to distingish rule ends. Options could be:
19 | - Wrap in { }
20 | - Use ; as a terminator
21 | - Prefix rules with "def" or something, because the end of an expression-list is either a | or a new rule.
22 |
23 | Grammar implementation:
24 | 1) scan all rules for all literal strings/keywords. sort them all by size, largest-to-smallest.
25 | 2) need to explicitly define some axiom rules.
26 | For Lua: Name, Numeral, LiteralString
27 |
28 | --]]
29 | local path = require 'ext.path'
30 | local table = require 'ext.table'
31 | local class = require 'ext.class'
32 | local assert = require 'ext.assert'
33 | local tolua = require 'ext.tolua'
34 | local template = require 'template'
35 | local GrammarTokenizer = require 'parser.grammar.tokenizer'
36 | local Parser = require 'parser.base.parser'
37 |
38 | local function tab(s)
39 | return s:gsub('\n', '\n\t')
40 | end
41 |
42 |
43 | -- all grammar ast classes, key'd by rule-name
44 | local ast = {}
45 |
46 | -- hmm ... move this to ASTNode root eventually
47 | ast.refreshparents = require 'parser.lua.ast'.refreshparents
48 |
49 | -- TODO for these rules (and for the rules that GrammarParser code-generates)
50 | -- I might as well create AST objects and override their :getcode() instead of making all the if/else conditions in GrammarParser:getcode()
51 |
52 | local ASTNode = require 'parser.base.ast'
53 |
54 | local GrammarASTNode = ASTNode:subclass()
55 | ast._node = GrammarASTNode
56 |
57 | function GrammarASTNode:init(...)
58 | for i=1,select('#', ...) do
59 | self[i] = select(i, ...)
60 | end
61 | end
62 |
63 | GrammarASTNode.insert = table.insert
64 | GrammarASTNode.append = table.append
65 |
66 | function GrammarASTNode:getcode(parser)
67 | error("need to handle grammar type "..tolua(self.type).." "..tolua(self))
68 | end
69 |
70 | local function nodeclass(type)
71 | local cl = GrammarASTNode:subclass()
72 | cl.type = type
73 | ast['_'..type] = cl
74 | return cl
75 | end
76 |
77 | local _rule = nodeclass'rule'
78 | --[[ how to alias? hmm, don't do this or :isa won't work ...
79 | _rule.__index = function(self, k)
80 | if k == 'name' then return self[1] end
81 | if k == 'expr' then return self[2] end
82 | --return _rule.super.__index(self, k)
83 | return _rule.super.__index[k]
84 | end
85 | --]]
86 | function _rule:name() return self[1] end
87 | function _rule:expr() return self[2] end
88 |
89 | -- :getcode(parser) will generate the code for inserting into the current created node,
90 | -- the current created node is assumed to be named 'result'
91 |
92 | local _or = nodeclass'or'
93 | function _or:getcode(parser)
94 | return template([[
95 | -- or
96 | -- TODO push rewind point here?
97 | repeat
98 | for _,child in ipairs(node) do
99 | ?>
100 | local oldlen = #result
101 | do
102 | =tab(tab(child:getcode(parser)))?>
103 | end
104 | if #result == oldlen then
105 | -- nothing was added? then break.
106 | -- TODO rewind token here?
107 | break
108 | end
109 | end
110 | ?>until true]],
111 | {
112 | node = self,
113 | parser = parser,
114 | tab = tab,
115 | })
116 | end
117 |
118 | local _optional = nodeclass'optional'
119 | -- optional is only different in that, after the optional code, we don't need any assert()'s / mustbe()'s
120 | function _optional:getcode(parser)
121 | assert.len(self, 1)
122 | return self[1]:getcode(parser)
123 | end
124 |
125 | local _multiple = nodeclass'multiple'
126 | function _multiple:getcode(parser)
127 | return template([[
128 | -- multiple
129 | repeat
130 |
131 | for i,child in ipairs(node) do
132 | local chsrc = child
133 | if ast._optional:isa(chsrc) then
134 | chsrc = chsrc[1]
135 | end
136 | ?>
137 | local oldlen = #result
138 | do
139 | =tab(tab(chsrc:getcode(parser)))?> -- multiple always canbe, ... or is it? ?>
140 | end
141 | if #result == oldlen then
142 | -- didn't get anything
143 | -- TODO a token rewind maybe?
144 | break
145 | end
146 |
147 | end
148 | ?>until false]],
149 | {
150 | node = self,
151 | parser = parser,
152 | ast = ast,
153 | tab = tab,
154 | })
155 | end
156 |
157 | -- expr just encapsulates multiple child exprs? hmm seems it does close to nothing.
158 | local _expr = nodeclass'expr'
159 | function _expr:getcode(parser)
160 | if #self == 1 then return self[1]:getcode(parser) end
161 | return template([[
162 | -- expr
163 | for i,child in ipairs(node) do
164 | local chsrc = child
165 | local canbe
166 | if ast._optional:isa(chsrc) then
167 | chsrc = chsrc[1]
168 | canbe = true
169 | end
170 | ?>=chsrc:getcode(parser, canbe)?>
171 | ]],
173 | {
174 | node = self,
175 | parser = parser,
176 | ast = ast,
177 | tab = tab,
178 | })
179 | end
180 |
181 | local _capture = nodeclass'capture'
182 | function _capture:getcode(parser)
183 | return '-- capture'
184 | end
185 |
186 | local _name = nodeclass'name'
187 | function _name:getcode(parser)
188 | assert.len(self, 1)
189 | local name = assert.type(self[1], 'string')
190 | assert.index(parser.ruleForName, name)
191 | return 'result:insert(self:parse_'..name..'())'
192 | end
193 |
194 | local _string = nodeclass'string'
195 | function _string:getcode(parser, canbe)
196 | assert.len(self, 1)
197 | local s = assert.type(self[1], 'string')
198 | -- keyword / symbol
199 | -- TODO this should be 'mustbe' unless its parent is 'optional' or 'multiple' ...
200 | -- or maybe don't make that change here, but make it in the parent node that generates this code ...
201 | local canmust = canbe and 'canbe' or 'mustbe'
202 | if parser.langKeywords[s] then
203 | return "self:"..canmust.."('"..s.."', 'keyword')"
204 | elseif parser.langSymbols[s] then
205 | return "self:"..canmust.."('"..s.."', 'symbol')"
206 | else
207 | error("found a string that wasn't a keyword or a symbol: "..tolua(s))
208 | end
209 | end
210 |
211 | --[[ hmm why does this get errors about {"stat"} ...
212 | _name.__index = function(self, k)
213 | if k == 'value' then return self[1] end
214 | return _name.super.__index[k]
215 | end
216 |
217 | _number.__index = function(self, k)
218 | if k == 'value' then return self[1] end
219 | return _number.super.__index[k]
220 | end
221 |
222 | _string.__index = function(self, k)
223 | if k == 'value' then return self[1] end
224 | return _string.super.__index[k]
225 | end
226 | --]]
227 |
228 | function _name:value() return self[1] end
229 | function _string:value() return self[1] end
230 | --function _number:value() return self[1] end
231 |
232 |
233 | local GrammarParser = Parser:subclass()
234 | GrammarParser.ast = ast
235 |
236 | -- static method, call with :
237 | function GrammarParser:fromFile(fn)
238 | return self(assert(path(fn):read()), fn)
239 | end
240 |
241 | function GrammarParser:buildTokenizer(data)
242 | return GrammarTokenizer(data)
243 | end
244 |
245 | function GrammarParser:setData(data, source, ...)
246 | GrammarParser.super.setData(self, data, source, ...)
247 |
248 | -- now we should have our self.tree
249 | -- from here we can convert it into a parse structure
250 | -- our first rule will be the start, i.e. :parseTree()
251 | -- subsequent rules become member functions
252 |
253 | self.ruleForName = {}
254 | -- builtin rules
255 | self.ruleForName.Name = true
256 | self.ruleForName.LiteralString = true
257 | self.ruleForName.Numeral = true
258 | for _,rule in ipairs(self.tree) do
259 | assert.len(rule, 2)
260 | self.ruleForName[rule:name()] = rule
261 | end
262 |
263 | -- while we're here, traverse all rules and pick out all symbols and keywords
264 | self.langKeywords = {}
265 | self.langSymbols = {}
266 | local function process(node)
267 | if ast._name:isa(node) then
268 | assert.len(node, 1)
269 | -- names in the grammar should always point to either other rules, or to builtin axiomatic rules (Name, Numeric, LiteralString)
270 | local name = assert.type(node:value(), 'string')
271 | local rule = self.ruleForName[name]
272 | if not rule then
273 | error("rule referenced but not defined: "..tolua(name))
274 | end
275 | -- TODO replace the element in the table with the AST? that'd remove the DAG property of the AST. no more pretty `tolua()` output.
276 | elseif ast._string:isa(node) then
277 | assert.len(node, 1)
278 | local s = assert.type(node:value(), 'string')
279 |
280 | -- keywords vs symbols are parsed separately
281 | -- keywords must be space-separated, and for now are only letters -- no symbol characters used (allowed?)
282 | -- symbols don't have to be space-separated and for now cannot be letters
283 | if s:find'%A' then
284 | assert(not s:find'%a')
285 | self.langSymbols[s] = true
286 | else
287 | self.langKeywords[s] = true
288 | end
289 | end
290 |
291 | for i,child in ipairs(node) do
292 | if type(child) == 'table' then
293 | process(child)
294 | end
295 | end
296 | end
297 | for _,rule in ipairs(self.tree) do
298 | process(rule)
299 | end
300 |
301 | local validTokenTypes = {
302 | start = true,
303 | ['end'] = true,
304 | keyword = true, -- word, unquoted, reserved token
305 | name = true, -- word, unquoted, not-reserved
306 | symbol = true, -- non-alphanumeric token
307 | number = true, -- number
308 | string = true,
309 | }
310 |
311 | local function tokenAndTypeToStr(tokenPair)
312 | return table.concat(tokenPair, ':')
313 | end
314 |
315 | --[[ construct DAG ...
316 | assert.is(self.tree[1], ast._rule)
317 | for _,nextTokenPair in ipairs(addRule({'start'}, self.tree[1])) do
318 | addEdge(nextTokenPair, {'end'})
319 | end
320 | --]]
321 | -- [[
322 | local function combine(...)
323 | return table():append(...):mapi(function(v)
324 | return true, v
325 | end):keys()
326 | end
327 | local function addEdges(edges, froms, tos)
328 | for _,from in ipairs(froms) do
329 | edges[from] = edges[from] or {}
330 | for _,to in ipairs(tos) do
331 | print('adding edge', from, to)
332 | edges[from][to] = true
333 | end
334 | end
335 | end
336 | local addFromsToRule
337 | local function addFromsToNode(edges, froms, node)
338 | assert.type(froms, 'table')
339 | --print('addFromsToNode', require 'ext.tolua'(froms), node.type)
340 | if ast._expr:isa(node) then
341 | -- "expr" is really "list" or "container of other nodes"
342 | for _,ch in ipairs(node) do
343 | assert.is(ch, ast._node)
344 | froms = addFromsToNode(edges, froms, ch)
345 | end
346 | return froms
347 | elseif ast._multiple:isa(node) then
348 | --[[
349 | multiple means ...
350 | froms -> start(node)
351 | end(node) -> start(node)
352 | end(node) -> tos
353 | --]]
354 | assert.len(node, 1)
355 | local mult = node[1]
356 | local firstfroms = froms
357 | froms = addFromsToNode(edges, froms, mult)
358 | addFromsToNode(edges, froms, mult) -- from end to beginning ... output should match 'froms'
359 | return combine(firstfroms, froms) -- combine for when there's 0
360 | elseif ast._optional:isa(node) then
361 | --[[
362 | froms -> optional
363 | optional -> tos
364 | froms -> tos
365 | ... same as multiple without the loop back
366 | --]]
367 | assert.len(node, 1)
368 | local opt = node[1]
369 | local firstfroms = froms
370 | froms = addFromsToNode(edges, froms, opt)
371 | return combine(firstfroms, froms) -- combine for when we skip it
372 | elseif ast._or:isa(node) then
373 | --[[
374 | froms -> start of each child of node
375 | end of each child of node -> tos
376 | --]]
377 | local tos = table()
378 | for _,ch in ipairs(node) do
379 | tos = combine(tos, addFromsToNode(edges, froms, ch))
380 | end
381 | return tos
382 | elseif ast._name:isa(node) then
383 | -- name is a rule ... or a builtin rule
384 | local ruleName = node:value()
385 | if ruleName == 'LiteralString'
386 | or ruleName == 'Numeral'
387 | or ruleName == 'Name'
388 | then
389 | local tos = {ruleName}
390 | addEdges(edges, froms, tos)
391 | return tos
392 | else
393 | local tos = {'rule:'..ruleName}
394 | addEdges(edges, froms, tos)
395 | return tos
396 | end
397 | elseif ast._string:isa(node) then
398 | -- string == literal
399 | local to = assert.type(node:value(), 'string')
400 | -- TODO why even bother separate it in canbe() ?
401 | local keywordOrSymbol = to:match'^[_a-zA-Z][_a-zA-Z0-9]*$' and 'keyword' or 'symbol'
402 | local tos = {keywordOrSymbol..':'..to}
403 | addEdges(edges, froms, tos)
404 | return tos
405 | elseif ast._capture:isa(node) then
406 | assert.len(node, 1)
407 | -- TODO where to tell the digraph that we are capturing something ...
408 | return addFromsToNode(edges, froms, node[1])
409 | end
410 | error('here with type '..tostring(node.type))
411 | end
412 | function addFromsToRule(edges, froms, rule)
413 | assert.is(rule, ast._rule)
414 | print()
415 | print('adding rule', rule:name())
416 | return addFromsToNode(edges, froms, rule:expr())
417 | end
418 |
419 | local edges = {}
420 |
421 | --[[
422 | each rule gets its own edges[][] digraph
423 | whose start node is 'start' and end node is 'end'
424 | --]]
425 | for i,rule in ipairs(self.tree) do
426 | assert.is(rule, ast._rule)
427 | local froms = addFromsToRule(edges, {'start:'..rule:name(), i==1 and 'start' or nil}, rule)
428 | addEdges(edges, froms, {'end:'..rule:name(), i==1 and 'end' or nil})
429 | end
430 |
431 | print()
432 | print'before collapse:'
433 | for from,tos in pairs(edges) do
434 | for to,v in pairs(tos) do
435 | print(from..' -> '..to)
436 | end
437 | end
438 |
439 | -- now collapse the rule parts of the graph ...
440 | for from,tos in pairs(edges) do
441 | for _,to in ipairs(table.keys(tos)) do
442 |
443 | -- send end:* to wherever rule:* goes
444 | -- mind you if nobody uses a rule then its end goes nowhere right?
445 | -- and give the DAG a value at this point to tell it to create this rule
446 | local ruleName = to:match'^end:(.*)$'
447 | if ruleName then
448 | edges[from][to] = nil
449 | for newto,v in pairs(assert.index(edges, 'rule:'..ruleName)) do
450 | -- now upon finishing a rule ... we're going to want it to pop out that node, right?
451 | edges[from][newto] = ruleName
452 | end
453 | end
454 | end
455 | end
456 | local stillMoreToCollapse
457 | repeat
458 | stillMoreToCollapse = false
459 | local rulesStillUsed = {}
460 | for from,tos in pairs(edges) do
461 | for _,to in ipairs(table.keys(tos)) do
462 | -- if it goes to rule:* then send it to start:*
463 | local ruleName = to:match'^rule:(.*)$'
464 | if ruleName then
465 | rulesStillUsed[ruleName] = true
466 | -- TODO if a rule: points to a start: points to a rule: then we could be erasing its dest here ...
467 | -- so only erase a rule IF you know it's not used anymore ...
468 | --edges[from][to] = nil
469 | for newto,v in pairs(assert.index(edges, 'start:'..ruleName)) do
470 | -- what about rule: that points to rule: ?
471 | local ruleName2 = newto:match'^rule:(.*)$'
472 | if ruleName2 and ruleName2 ~= ruleName then
473 | -- still getting some circlees ...
474 | print(to.." goes to "..newto)
475 | stillMoreToCollapse = true
476 | end
477 | edges[from][newto] = true
478 | end
479 | end
480 | end
481 | end
482 | for from,tos in pairs(edges) do
483 | for _,to in ipairs(table.keys(tos)) do
484 | local ruleName = to:match'^rule:(.*)$'
485 | if ruleName
486 | --and not rulesStillUsed[ruleName]
487 | then
488 | edges[from][to] = nil
489 | end
490 | end
491 | end
492 | until not stillMoreToCollapse
493 |
494 | -- ... and then remove the rule starts and ends
495 | for _,from in ipairs(table.keys(edges)) do
496 | local ruleName = from:match'^start:(.*)$' or from:match'^end:(.*)$' or from:match'^rule:(.*)$'
497 | if ruleName then
498 | edges[from] = nil
499 | end
500 | end
501 | --]]
502 |
503 | --[[ list every edge
504 | for _,from in ipairs(table.keys(edges)) do
505 | local tos = edges[from]
506 | for _,to in ipairs(table.keys(tos)) do
507 | local v = tos[to]
508 | print(from..' -> '..to..(v ~= true and ' ['..tostring(v)..']' or ''))
509 | end
510 | end
511 | --]]
512 | -- [[ generate code
513 |
514 | local rootASTClassName = 'LuaASTNode'
515 | local tokenizerClassName = 'LuaTokenizer'
516 | local parserClassName = 'LuaParser'
517 |
518 | print(template([=[
519 | -- generated by 'parser.grammar' using file "=source?>"
520 | local table = require 'ext.table'
521 | local ASTNode = require 'parser.base.ast'
522 | local Tokenizer = require 'parser.base.tokenizer'
523 |
524 | local ast = {}
525 |
526 | local =rootASTClassName?> = ASTNode:subclass()
527 |
528 | local function nodeclass(args, parent)
529 | parent = parent or =rootASTClassName?>
530 | local cl = parent:subclass(args)
531 | ast['_'..cl.type] = cl
532 | return cl
533 | end
534 |
535 | for _,rule in ipairs(rules) do
536 | ?>local _=rule:name()?> = nodeclass{type==tolua(rule:name())?>}
537 | end
538 | ?>
539 |
540 | local =tokenizerClassName?> = Tokenizer:subclass()
541 |
542 | local edges = =tolua(edges)?>
543 |
544 | =tokenizerClassName?>.symbols = table(=tolua(table.keys(self.langSymbols))?>)
545 | =tokenizerClassName?>.keywords = =tolua(self.langKeywords)?>
546 |
547 | function =parserClassName?>:buildTokenizer(data)
548 | return =tokenizerClassName?>(data)
549 | end
550 |
551 | function =parserClassName?>:parseTree()
552 | return =parserClassName?>:parse_=rules[1]:name()?>()
553 | end
554 | ]=], {
555 | -- requires above
556 | table = table,
557 | tolua = tolua,
558 | -- self
559 | self = self,
560 | -- locals
561 | edges = edges,
562 | tab = tab,
563 | source = source,
564 | rules = self.tree,
565 | rootASTClassName = rootASTClassName,
566 | tokenizerClassName = tokenizerClassName,
567 | parserClassName = parserClassName,
568 | }))
569 |
570 | --]]
571 | end
572 |
573 | function GrammarParser:parseTree()
574 | rules = table()
575 | repeat
576 | if not self.t.token then break end -- nothing left = done
577 |
578 | local rule = self:parseRule()
579 | if not rule then break end
580 |
581 | self:canbe(';', 'symbol')
582 | assert.is(rule, ast._rule)
583 | rules:insert(rule)
584 | until false
585 | return rules
586 | end
587 |
588 | function GrammarParser:parseRule()
589 | -- can-be + capture + assign 'name'
590 | local name = self:mustbe(nil, 'name')
591 |
592 | -- must-be + ignore ... do we ever want to capture a must-be? maybe?
593 | self:mustbe('::=', 'symbol')
594 |
595 | -- TODO i'm overusing and improperly using the term 'expr'
596 | -- can-be + capture + assign 'expr'
597 | local expr = self:parseExprOr()
598 |
599 | return ast._rule(name, expr)
600 | end
601 |
602 | function GrammarParser:parseExprOr()
603 | local expr = self:parseExprList()
604 | local orexpr
605 |
606 | if self:canbe('|', 'symbol') then
607 | local expr2 = self:parseExprOr()
608 | if not orexpr then
609 | orexpr = ast._or(expr)
610 | expr = orexpr
611 | end
612 | if ast._or:isa(expr2) then
613 | -- merge or's
614 | for i,child in ipairs(expr2) do
615 | orexpr:insert(child)
616 | end
617 | else
618 | orexpr:insert(expr2)
619 | end
620 | end
621 | return expr
622 | end
623 |
624 | function GrammarParser:parseExprList()
625 | local expr = ast._expr()
626 | repeat
627 | if self:canbe('{', 'symbol') then
628 | local expr2 = self:parseExprOr()
629 | --assert(not ast._multiple:isa(expr2)) -- no {{ }} allowed, just { }
630 | self:mustbe('}', 'symbol')
631 | expr:insert(ast._multiple(expr2))
632 | elseif self:canbe('[', 'symbol') then
633 | local expr2 = self:parseExprOr()
634 | self:mustbe(']', 'symbol')
635 | expr:insert(ast._optional(expr2))
636 | elseif self:canbe('(', 'symbol') then
637 | local expr2 = self:parseExprOr()
638 | self:mustbe(')', 'symbol')
639 | expr:insert(ast._capture(expr2))
640 | elseif self:canbe(nil, 'name') then
641 | expr:insert(ast._name(self.lasttoken))
642 | elseif self:canbe(nil, 'number') then
643 | expr:insert(ast._number(self.lasttoken))
644 | elseif self:canbe(nil, 'string') then
645 | expr:insert(ast._string(self.lasttoken))
646 | else
647 | break
648 | end
649 | until false
650 | -- unwrap
651 | while #expr == 1 and ast._expr:isa(expr) do
652 | expr = expr[1]
653 | end
654 | return expr
655 | end
656 |
657 | -- [[ test:
658 | local syntax51 = GrammarParser:fromFile'syntax_ast_5.1.txt'
659 | --]]
660 |
661 | return GrammarParser
662 |
--------------------------------------------------------------------------------
/grammar/tokenizer.lua:
--------------------------------------------------------------------------------
1 | local Tokenizer = require 'parser.base.tokenizer'
2 |
3 | local GrammarTokenizer = Tokenizer:subclass()
4 |
5 | function GrammarTokenizer:initSymbolsAndKeywords()
6 | for w in ([[ ::= | ; { } [ ] ( ) ]]):gmatch('%S+') do
7 | self.symbols:insert(w)
8 | end
9 | end
10 |
11 | return GrammarTokenizer
12 |
--------------------------------------------------------------------------------
/load_xform.lua:
--------------------------------------------------------------------------------
1 | -- parser.load_xorm uses ext.load to modify the load(), loadfile() and require() functions
2 | --DEBUG: local showcode = require 'template.showcode'
3 | local LuaParser = require 'parser.lua.parser'
4 |
5 | local callbacks = setmetatable({}, {__index=table})
6 |
7 | local state = require 'ext.load'()
8 | callbacks.state = state
9 |
10 | state.xforms:insert(function(d, source)
11 | --DEBUG: print()
12 | --DEBUG: print(debug.traceback())
13 | --DEBUG: print'!!! BEFORE PARSE !!!'
14 | --DEBUG: print('parser.load_xform source: '..source)
15 | --DEBUG: print(showcode(d))
16 | --DEBUG: print()
17 |
18 | local parser = LuaParser()
19 | local success, msg = parser:setData(d, source)
20 | if not success then
21 | if parser.t then
22 | msg = parser.t:getpos()..': '..msg
23 | end
24 | return nil, msg
25 | end
26 | local tree = parser.tree
27 | for _,cb in ipairs(callbacks) do
28 | cb(tree, source)
29 | end
30 | local code = tree:toLua()
31 |
32 | --DEBUG: print()
33 | --DEBUG: print(debug.traceback())
34 | --DEBUG: print'!!! AFTER PARSE !!!'
35 | --DEBUG: print('parser.load_xform source: '..source)
36 | --DEBUG: print(showcode(code))
37 | --DEBUG: print()
38 |
39 | return code
40 | end)
41 |
42 | return callbacks
43 |
--------------------------------------------------------------------------------
/lua/ast.lua:
--------------------------------------------------------------------------------
1 | --[[
2 | parser.base.ast returns the BaseAST root of all AST nodes
3 |
4 | TODO ...
5 | ... but parser.lua.ast (and maybe soon parser.grammar.ast) return a collection-of-nodes, which are key'd to the token ... hmm ...
6 | maybe for consistency I should have parser.lua.ast return the LuaAST, which is an BaseAST child, and parent of all Lua AST nodes ...
7 | ... and give that node a member htat holds a key/value map to all nodes per token ...
8 | But using a namespace is definitely convenient, especially with all the member subclasses and methods that go in it (traverse, nodeclass, etc)
9 | ... though these can easily turn into member fields and member methods
10 |
11 | tempting to replace the 'ast' namespace with just LuaAST itself, and keep the convention that keys beginning with `_` are subclasses...
12 | --]]
13 | local table = require 'ext.table'
14 | local assert = require 'ext.assert'
15 | local tolua = require 'ext.tolua'
16 |
17 | local BaseAST = require 'parser.base.ast'
18 |
19 |
20 | -- namespace table of all Lua AST nodes
21 | -- TODO get rid of parser's dependency on this? or somehow make the relation between parser rules and ast's closer, like generate the AST from the parser-rules?
22 | -- another TODO how about just storing subclasses as `._type` , then the 'ast' usage outside this file can be just echanged with LuaASTNode itself, and the file can return a class, and lots of things can be simplified
23 | local ast = {}
24 |
25 | -- Lua-specific parent class. root of all other ast node classes in this file.
26 | local LuaAST = BaseAST:subclass()
27 |
28 | -- assign to 'ast.node' to define it as the Lua ast's parent-most node class
29 | ast.node = LuaAST
30 |
31 | --[[
32 | args:
33 | maintainSpan = set to true to have the output maintain the input's span
34 | --]]
35 | local slashNByte = ('\n'):byte()
36 | function LuaAST:serializeRecursiveMember(field, args)
37 | local maintainSpan
38 | if args then
39 | maintainSpan = args.maintainSpan
40 | end
41 | local s = ''
42 | -- :serialize() impl provided by child classes
43 | -- :serialize() should call traversal in-order of parsing (why I want to make it auto and assoc wth the parser and grammar and rule-generated ast node classes)
44 | -- that means serialize() itself should never call serialize() but only call the consume() function passed into it (for modularity's sake)
45 | -- it might mean i should capture all nodes too, even those that are fixed, like keywords and symbols, for the sake of reassmbling the syntax
46 | local line = 1
47 | local col = 1
48 | local index = 1
49 | local consume
50 | local lastspan
51 | consume = function(x)
52 | if type(x) == 'number' then
53 | x = tostring(x)
54 | end
55 | if type(x) == 'string' then
56 | -- here's our only string join
57 | local function append(u)
58 | for i=1,#u do
59 | if u:byte(i) == slashNByte then
60 | col = 1
61 | line = line + 1
62 | else
63 | col = col + 1
64 | end
65 | end
66 | index = index + #u
67 | s = s .. u
68 | end
69 |
70 | -- TODO here if you want ... pad lines and cols until we match the original location (or exceed it)
71 | -- to do that, track appended strings to have a running line/col counter just like we do in parser
72 | -- to do that, separate teh updatelinecol() in the parser to work outside datareader
73 | if maintainSpan and lastspan then
74 | while line < lastspan.from.line do
75 | append'\n'
76 | end
77 | end
78 |
79 | -- if we have a name coming in, only insert a space if we were already at a name
80 | local namelhs = s:sub(-1):match'[_%w%.]'
81 | local namerhs = x:sub(1,1):match'[_%w%.]'
82 | if namelhs and namerhs then
83 | append' '
84 | elseif not namelhs and not namerhs then
85 | -- TODO here for minification if you want
86 | -- if we have a symbol coming in, only insert a space if we were already at a symbol and the two together would make a different valid symbol
87 | -- you'll need to search back the # of the max length of any symbol ...
88 | append' '
89 | end
90 | append(x)
91 | elseif type(x) == 'table' then
92 | lastspan = x.span
93 | assert.is(x, BaseAST)
94 | assert.index(x, field)
95 | x[field](x, consume)
96 | else
97 | error('here with unknown type '..type(x))
98 | end
99 | end
100 | self[field](self, consume)
101 | return s
102 | end
103 |
104 | function LuaAST:toLua(args)
105 | return self:serializeRecursiveMember('toLua_recursive', args)
106 | end
107 |
108 | -- why distinguish toLua() and serialize(consume)?
109 | -- The need for this design pops up more in subclasses.
110 | -- serialize(consume) is used by all language-serializations
111 | -- toLua_recursive(consume) is for Lua-specific serialization (to-be-subclassed)
112 | -- I'm not sure if this is better than just using a fully separate table of serialization functions per node ...
113 | -- toLua() is the external API
114 | function LuaAST:toLua_recursive(consume)
115 | return self:serialize(consume)
116 | end
117 |
118 | -- ok maybe it's not such a good idea to use tostring and serialization for the same purpose ...
119 | LuaAST.__tostring = string.nametostring
120 |
121 | function LuaAST:exec(...)
122 | local code = self:toLua()
123 | local f, msg = load(code, ...)
124 | if not f then
125 | return nil, msg, code
126 | end
127 | return f
128 | end
129 |
130 |
131 | -- TODO what's a more flexible way of iterating through all child fields?
132 | -- and what's a more flexible way of constructing AST node subclass, and of specifying their fields,
133 | -- especially with grammar rule construction?
134 | -- ... how about instead make all fields indexed, and then for certain classes give them aliases into the fields?
135 | -- ... same with htmlparser?
136 | -- then in line with this, fields will either point to nodes, or point to tables to nodes?
137 | -- or maybe the tables-of-nodes should themselves be AST nodes?
138 | local fields = {
139 | {'name', 'field'},
140 | {'index', 'field'},
141 | {'value', 'field'},
142 | {'cond', 'one'},
143 | {'var', 'one'},
144 | {'min', 'one'},
145 | {'max', 'one'},
146 | {'step', 'one'},
147 | {'func', 'one'}, -- should this be a _function, or a string depicting a function?
148 | {'arg', 'one'},
149 | {'key', 'one'},
150 | {'expr', 'one'},
151 | {'stmt', 'one'},
152 | {'args', 'many'},
153 | {'exprs', 'many'},
154 | {'elseifs', 'many'},
155 | {'elsestmt', 'many'},
156 | {'vars', 'many'},
157 | }
158 |
159 | ast.exec = LuaAST.exec
160 |
161 | --[[
162 | I need to fix this up better to handle short-circuiting, replacing, removing, etc...
163 | parentFirstCallback is the parent-first traversal method
164 | childFirstCallback is the child-first traversal
165 | return what value of the callbacks you want
166 | returning a new node at the parent callback will not traverse its subsequent new children added to the tree
167 | --]]
168 | local function traverseRecurse(
169 | node,
170 | parentFirstCallback,
171 | childFirstCallback,
172 | parentNode
173 | )
174 | if not LuaAST:isa(node) then return node end
175 | if parentFirstCallback then
176 | local ret = parentFirstCallback(node, parentNode)
177 | if ret ~= node then
178 | return ret
179 | end
180 | end
181 | if type(node) == 'table' then
182 | -- treat the object itself like an array of many
183 | for i=1,#node do
184 | node[i] = traverseRecurse(node[i], parentFirstCallback, childFirstCallback, node)
185 | end
186 | for _,field in ipairs(fields) do
187 | local name = field[1]
188 | local howmuch = field[2]
189 | if node[name] then
190 | if howmuch == 'one' then
191 | node[name] = traverseRecurse(node[name], parentFirstCallback, childFirstCallback, node)
192 | elseif howmuch == 'many' then
193 | local value = node[name]
194 | for i=#value,1,-1 do
195 | value[i] = traverseRecurse(value[i], parentFirstCallback, childFirstCallback, node)
196 | end
197 | elseif howmuch == 'field' then
198 | else
199 | error("unknown howmuch "..howmuch)
200 | end
201 | end
202 | end
203 | end
204 | if childFirstCallback then
205 | node = childFirstCallback(node, parentNode)
206 | end
207 | return node
208 | end
209 |
210 | function ast.refreshparents(node)
211 | traverseRecurse(node, function(node, parent)
212 | node.parent = parent
213 | return node
214 | end)
215 | end
216 |
217 | local function traverse(node, ...)
218 | local newnode = traverseRecurse(node, ...)
219 | ast.refreshparents(newnode)
220 | return newnode
221 | end
222 |
223 | LuaAST.traverse = traverse
224 | ast.traverse = traverse
225 |
226 | function LuaAST.copy(n)
227 | local newn = {}
228 | setmetatable(newn, getmetatable(n))
229 | for i=1,#n do
230 | newn[i] = LuaAST.copy(n[i])
231 | end
232 | for _,field in ipairs(fields) do
233 | local name = field[1]
234 | local howmuch = field[2]
235 | local value = n[name]
236 | if value then
237 | if howmuch == 'one' then
238 | if type(value) == 'table' then
239 | newn[name] = LuaAST.copy(value)
240 | else
241 | newn[name] = value
242 | end
243 | elseif howmuch == 'many' then
244 | local newmany = {}
245 | for k,v in ipairs(value) do
246 | if type(v) == 'table' then
247 | newmany[k] = LuaAST.copy(v)
248 | else
249 | newmany[k] = v
250 | end
251 | end
252 | newn[name] = newmany
253 | elseif howmuch == 'field' then
254 | newn[name] = value
255 | else
256 | error("unknown howmuch "..howmuch)
257 | end
258 | end
259 | end
260 | return newn
261 | end
262 | ast.copy = LuaAST.copy
263 |
264 | --[[
265 | flatten a function:
266 | for all its calls, insert them as statements inside the function
267 | this is only possible if the called functions are of a specific form...
268 | varmap is the mapping from function names to _function objects to inline in the _call's place
269 |
270 |
271 | if the nested function ends with a return ...
272 | ... then insert its declarations (for var remapping) into a statement just before the one with this call
273 | ... and wrap our return contents in parenthesis ... or make general use of ()'s everywhere (for resolution order)
274 |
275 | f stmt
276 | f stmt
277 | f stmt
278 | return something(g(...), h(...))
279 |
280 | becomes
281 |
282 | f stmt
283 | f stmt
284 | f stmt
285 | local g ret
286 | g stmt
287 | g stmt
288 | g stmt
289 | g ret = previous return value of h
290 | local h ret
291 | h stmt
292 | h stmt
293 | h stmt
294 | h ret = previous return value of h
295 | return something(g ret, h ret)
296 |
297 | --]]
298 | function LuaAST.flatten(f, varmap)
299 | f = LuaAST.copy(f)
300 | traverseRecurse(f, function(n)
301 | if type(n) == 'table'
302 | and ast._call:isa(n)
303 | then
304 | local funcname = n.func:toLua() -- in case it's a var ... ?
305 | assert(funcname, "can't flatten a function with anonymous calls")
306 | local f = varmap[funcname]
307 | if f
308 | and #f == 1
309 | and ast._return:isa(f[1])
310 | then
311 | local retexprs = {}
312 | for i,e in ipairs(f[1].exprs) do
313 | retexprs[i] = LuaAST.copy(e)
314 | traverseRecurse(retexprs[i], function(v)
315 | -- _arg is not used by parser - externally used only - I should move flatten somewhere else ...
316 | if ast._arg:isa(v) then
317 | return LuaAST.copy(n.args[i])
318 | end
319 | end)
320 | retexprs[i] = ast._par(retexprs[i])
321 | end
322 | return ast._block(table.unpack(retexprs)) -- TODO exprlist, and redo assign to be based on vars and exprs
323 | end
324 | end
325 | return n
326 | end)
327 | return f
328 | end
329 | ast.flatten = LuaAST.flatten
330 |
331 | local function consumeconcat(consume, t, sep)
332 | for i,x in ipairs(t) do
333 | consume(x)
334 | if sep and i < #t then
335 | consume(sep)
336 | end
337 | end
338 | end
339 |
340 | local function spacesep(stmts, consume)
341 | consumeconcat(consume, stmts)
342 | end
343 |
344 | local function commasep(exprs, consume)
345 | consumeconcat(consume, exprs, ',')
346 | end
347 |
348 | local function nodeclass(type, parent, args)
349 | parent = parent or LuaAST
350 | local cl = parent:subclass(args)
351 | cl.type = type
352 | cl.__name = type
353 | ast['_'..type] = cl
354 | return cl
355 | end
356 | ast.nodeclass = nodeclass
357 |
358 | -- helper function
359 | local function isLuaName(s)
360 | return s:match'^[_%a][_%w]*$'
361 | end
362 | function ast.keyIsName(key, parser)
363 | return ast._string:isa(key)
364 | -- if key is a string and has no funny chars
365 | and isLuaName(key.value)
366 | and (
367 | -- ... and if we don't have a .parser assigned (as is the case of some dynamic ast manipulation ... *cough* vec-lua *cough* ...)
368 | not parser
369 | -- ... or if we do have a parser and this name isn't a keyword in the parser's tokenizer
370 | or not parser.t.keywords[key.value]
371 | )
372 | end
373 |
374 | -- generic global stmt collection
375 | local _block = nodeclass'block'
376 | function _block:init(...)
377 | for i=1,select('#', ...) do
378 | self[i] = select(i, ...)
379 | end
380 | end
381 | function _block:serialize(consume)
382 | spacesep(self, consume)
383 | end
384 |
385 | --statements
386 |
387 | local _stmt = nodeclass'stmt'
388 |
389 | -- TODO 'vars' and 'exprs' should be nodes themselves ...
390 | local _assign = nodeclass('assign', _stmt)
391 | function _assign:init(vars, exprs)
392 | self.vars = table(vars)
393 | self.exprs = table(exprs)
394 | end
395 | function _assign:serialize(consume)
396 | commasep(self.vars, consume)
397 | consume'='
398 | commasep(self.exprs, consume)
399 | end
400 |
401 | -- should we impose construction constraints _do(_block(...))
402 | -- or should we infer? _do(...) = {type = 'do', block = {type = 'block, ...}}
403 | -- or should we do neither? _do(...) = {type = 'do', ...}
404 | -- neither for now
405 | -- but that means _do and _block are identical ...
406 | local _do = nodeclass('do', _stmt)
407 | function _do:init(...)
408 | for i=1,select('#', ...) do
409 | self[i] = select(i, ...)
410 | end
411 | end
412 | function _do:serialize(consume)
413 | consume'do'
414 | spacesep(self, consume)
415 | consume'end'
416 | end
417 |
418 | local _while = nodeclass('while', _stmt)
419 | -- TODO just make self[1] into the cond ...
420 | function _while:init(cond, ...)
421 | self.cond = cond
422 | for i=1,select('#', ...) do
423 | self[i] = select(i, ...)
424 | end
425 | end
426 | function _while:serialize(consume)
427 | consume'while'
428 | consume(self.cond)
429 | consume'do'
430 | spacesep(self, consume)
431 | consume'end'
432 | end
433 |
434 | local _repeat = nodeclass('repeat', _stmt)
435 | -- TODO just make self[1] into the cond ...
436 | function _repeat:init(cond, ...)
437 | self.cond = cond
438 | for i=1,select('#', ...) do
439 | self[i] = select(i, ...)
440 | end
441 | end
442 | function _repeat:serialize(consume)
443 | consume'repeat'
444 | spacesep(self, consume)
445 | consume'until'
446 | consume(self.cond)
447 | end
448 |
449 | --[[
450 | _if(_eq(a,b),
451 | _assign({a},{2}),
452 | _elseif(...),
453 | _elseif(...),
454 | _else(...))
455 | --]]
456 | -- weird one, idk how to reformat
457 | local _if = nodeclass('if', _stmt)
458 | -- TODO maybe just assert the node types and store them as-is in self[i]
459 | function _if:init(cond,...)
460 | local elseifs = table()
461 | local elsestmt, laststmt
462 | for i=1,select('#', ...) do
463 | local stmt = select(i, ...)
464 | if ast._elseif:isa(stmt) then
465 | elseifs:insert(stmt)
466 | elseif ast._else:isa(stmt) then
467 | assert(not elsestmt)
468 | elsestmt = stmt -- and remove
469 | else
470 | if laststmt then
471 | assert(laststmt.type ~= 'elseif' and laststmt.type ~= 'else', "got a bad stmt in an if after an else: "..laststmt.type)
472 | end
473 | table.insert(self, stmt)
474 | end
475 | laststmt = stmt
476 | end
477 | self.cond = cond
478 | self.elseifs = elseifs
479 | self.elsestmt = elsestmt
480 | end
481 | function _if:serialize(consume)
482 | consume'if'
483 | consume(self.cond)
484 | consume'then'
485 | spacesep(self, consume)
486 | for _,ei in ipairs(self.elseifs) do
487 | consume(ei)
488 | end
489 | if self.elsestmt then
490 | consume(self.elsestmt)
491 | end
492 | consume'end'
493 | end
494 |
495 | -- aux for _if
496 | local _elseif = nodeclass('elseif', _stmt)
497 | -- TODO just make self[1] into the cond ...
498 | function _elseif:init(cond,...)
499 | self.cond = cond
500 | for i=1,select('#', ...) do
501 | self[i] = select(i, ...)
502 | end
503 | end
504 | function _elseif:serialize(consume)
505 | consume'elseif'
506 | consume(self.cond)
507 | consume'then'
508 | spacesep(self, consume)
509 | end
510 |
511 | -- aux for _if
512 | local _else = nodeclass('else', _stmt)
513 | function _else:init(...)
514 | for i=1,select('#', ...) do
515 | self[i] = select(i, ...)
516 | end
517 | end
518 | function _else:serialize(consume)
519 | consume'else'
520 | spacesep(self, consume)
521 | end
522 |
523 | local _foreq = nodeclass('foreq', _stmt)
524 | -- step is optional
525 | -- TODO just make self[1..4] into the var, min, max, step ...
526 | -- ... this means we can possibly have a nil child mid-sequence ...
527 | -- .. hmm ...
528 | -- ... which is better:
529 | -- *) requiring table.max for integer iteration instead of ipairs
530 | -- *) or using fields instead of integer indexes?
531 | function _foreq:init(var,min,max,step,...)
532 | self.var = var
533 | self.min = min
534 | self.max = max
535 | self.step = step
536 | for i=1,select('#', ...) do
537 | self[i] = select(i, ...)
538 | end
539 | end
540 | function _foreq:serialize(consume)
541 | consume'for'
542 | consume(self.var)
543 | consume'='
544 | consume(self.min)
545 | consume','
546 | consume(self.max)
547 | if self.step then
548 | consume','
549 | consume(self.step)
550 | end
551 | consume'do'
552 | spacesep(self, consume)
553 | consume'end'
554 | end
555 |
556 | -- TODO 'vars' should be a node itself
557 | local _forin = nodeclass('forin', _stmt)
558 | function _forin:init(vars, iterexprs, ...)
559 | self.vars = vars
560 | self.iterexprs = iterexprs
561 | for i=1,select('#', ...) do
562 | self[i] = select(i, ...)
563 | end
564 | end
565 | function _forin:serialize(consume)
566 | consume'for'
567 | commasep(self.vars, consume)
568 | consume'in'
569 | commasep(self.iterexprs, consume)
570 | consume'do'
571 | spacesep(self, consume)
572 | consume'end'
573 | end
574 |
575 | local _function = nodeclass('function', _stmt)
576 | -- name is optional
577 | -- TODO make 'args' a node
578 | function _function:init(name, args, ...)
579 | -- prep args...
580 | for i=1,#args do
581 | args[i].index = i
582 | args[i].param = true
583 | end
584 | self.name = name
585 | self.args = args
586 | for i=1,select('#', ...) do
587 | self[i] = select(i, ...)
588 | end
589 | end
590 | function _function:serialize(consume)
591 | consume'function'
592 | if self.name then
593 | consume(self.name)
594 | end
595 | consume'('
596 | commasep(self.args, consume)
597 | consume')'
598 | spacesep(self, consume)
599 | consume'end'
600 | end
601 |
602 | -- aux for _function
603 | -- not used by parser - externally used only - I should get rid of it
604 | local _arg = nodeclass'arg'
605 | -- TODO just self[1] ?
606 | function _arg:init(index)
607 | self.index = index
608 | end
609 | -- params need to know what function they're in
610 | -- so they can reference the function's arg names
611 | function _arg:serialize(consume)
612 | consume('arg'..self.index)
613 | end
614 |
615 | -- _local can be an assignment of multi vars to muli exprs
616 | -- or can optionally be a declaration of multi vars with no statements
617 | -- so it will take the form of assignments
618 | -- but it can also be a single function declaration with no equals symbol ...
619 | -- the parser has to accept functions and variables as separate conditions
620 | -- I'm tempted to make them separate symbols here too ...
621 | -- exprs is a table containing: 1) a single function 2) a single assign statement 3) a list of variables
622 | local _local = nodeclass('local', _stmt)
623 | -- TODO just self[1] instead of self.exprs[i]
624 | function _local:init(exprs)
625 | if ast._function:isa(exprs[1]) or ast._assign:isa(exprs[1]) then
626 | assert(#exprs == 1, "local functions or local assignments must be the only child")
627 | end
628 | self.exprs = table(assert(exprs))
629 | end
630 | function _local:serialize(consume)
631 | if ast._function:isa(self.exprs[1]) or ast._assign:isa(self.exprs[1]) then
632 | consume'local'
633 | consume(self.exprs[1])
634 | else
635 | consume'local'
636 | commasep(self.exprs, consume)
637 | end
638 | end
639 |
640 | -- control
641 |
642 | local _return = nodeclass('return', _stmt)
643 | -- TODO either 'exprs' a node of its own, or flatten it into 'return'
644 | function _return:init(...)
645 | self.exprs = {...}
646 | end
647 | function _return:serialize(consume)
648 | consume'return'
649 | commasep(self.exprs, consume)
650 | end
651 |
652 | local _break = nodeclass('break', _stmt)
653 | function _break:serialize(consume) consume'break' end
654 |
655 | local _call = nodeclass'call'
656 | -- TODO 'args' a node of its own ? or store it in self[i] ?
657 | function _call:init(func, ...)
658 | self.func = func
659 | self.args = {...}
660 | end
661 | function _call:serialize(consume)
662 | if #self.args == 1
663 | and (ast._table:isa(self.args[1])
664 | or ast._string:isa(self.args[1])
665 | ) then
666 | consume(self.func)
667 | consume(self.args[1])
668 | else
669 | consume(self.func)
670 | consume'('
671 | commasep(self.args, consume)
672 | consume')'
673 | end
674 | end
675 |
676 | local _nil = nodeclass'nil'
677 | _nil.const = true
678 | function _nil:serialize(consume) consume'nil' end
679 |
680 | local _boolean = nodeclass'boolean'
681 |
682 | local _true = nodeclass('true', _boolean)
683 | _true.const = true
684 | _true.value = true
685 | function _true:serialize(consume) consume'true' end
686 |
687 | local _false = nodeclass('false', _boolean)
688 | _false.const = true
689 | _false.value = false
690 | function _false:serialize(consume) consume'false' end
691 |
692 | local _number = nodeclass'number'
693 | -- TODO just self[1] instead of self.value ?
694 | -- but this breaks convention with _boolean having .value as its static member value.
695 | -- I could circumvent this with _boolean subclass [1] holding the value ...
696 | function _number:init(value) self.value = value end
697 | function _number:serialize(consume) consume(tostring(self.value)) end
698 |
699 | local _string = nodeclass'string'
700 | -- TODO just self[1] instead of self.value
701 | function _string:init(value) self.value = value end
702 | function _string:serialize(consume)
703 | -- use ext.tolua's string serializer
704 | consume(tolua(self.value))
705 | end
706 |
707 | local _vararg = nodeclass'vararg'
708 | function _vararg:serialize(consume) consume'...' end
709 |
710 | -- TODO 'args' a node, or flatten into self[i] ?
711 | local _table = nodeclass'table' -- single-element assigns
712 | function _table:init(...)
713 | for i=1,select('#', ...) do
714 | self[i] = select(i, ...)
715 | end
716 | end
717 | function _table:serialize(consume)
718 | consume'{'
719 | for i,arg in ipairs(self) do
720 | -- if it's an assign then wrap the vars[1] with []'s
721 | if ast._assign:isa(arg) then
722 | assert.len(arg.vars, 1)
723 | assert.len(arg.exprs, 1)
724 | -- TODO if it's a string and name and not a keyword then use our shorthand
725 | -- but for this , I should put the Lua keywords in somewhere that both the AST and Tokenizer can see them
726 | -- and the Tokenizer builds separate lists depending on the version (so I guess a table per version?)
727 | if ast.keyIsName(arg.vars[1], self.parser) then
728 | consume(arg.vars[1].value)
729 | else
730 | consume'['
731 | consume(arg.vars[1])
732 | consume']'
733 | end
734 | consume'='
735 | consume(arg.exprs[1])
736 | else
737 | consume(arg)
738 | end
739 | if i < #self then
740 | consume','
741 | end
742 | end
743 | consume'}'
744 | end
745 |
746 | -- OK here is the classic example of the benefits of fields over integers:
747 | -- extensibility.
748 | -- attrib was added later
749 | -- as we add/remove fields, that means reordering indexes, and that means a break in compat
750 | -- one workaround to merging the two is just named functions and integer-indexed children
751 | -- another is a per-child traversal routine (like :serialize())
752 | local _var = nodeclass'var' -- variable, lhs of ast._assign's
753 | function _var:init(name, attrib)
754 | self.name = name
755 | self.attrib = attrib
756 | end
757 | function _var:serialize(consume)
758 | consume(self.name)
759 | if self.attrib then
760 | -- the extra space is needed for assignments, otherwise lua5.4 `local x=1` chokes while `local x =1` works
761 | consume'<'
762 | consume(self.attrib)
763 | consume'>'
764 | end
765 | end
766 |
767 | local _par = nodeclass'par'
768 | ast._par = _par
769 | ast._parenthesis = nil
770 | function _par:init(expr)
771 | self.expr = expr
772 | end
773 | function _par:serialize(consume)
774 | consume'('
775 | consume(self.expr)
776 | consume')'
777 | end
778 |
779 | local _index = nodeclass'index'
780 | function _index:init(expr,key)
781 | self.expr = expr
782 | -- helper add wrappers to some types:
783 | -- TODO or not?
784 | if type(key) == 'string' then
785 | key = ast._string(key)
786 | elseif type(key) == 'number' then
787 | key = ast._number(key)
788 | end
789 | self.key = key
790 | end
791 | function _index:serialize(consume)
792 | if ast.keyIsName(self.key, self.parser) then
793 | -- the use a .$key instead of [$key]
794 | consume(self.expr)
795 | consume'.'
796 | consume(self.key.value)
797 | else
798 | consume(self.expr)
799 | consume'['
800 | consume(self.key)
801 | consume']'
802 | end
803 | end
804 |
805 | -- this isn't the () call itself, this is just the : dereference
806 | -- a:b(c) is _call(_indexself(_var'a', _var'b'), _var'c')
807 | -- technically this is a string lookup, however it is only valid as a lua name, so I'm just passing the Lua string itself
808 | local _indexself = nodeclass'indexself'
809 | function _indexself:init(expr,key)
810 | self.expr = assert(expr)
811 | assert(isLuaName(key))
812 | -- TODO compat with _index? always wrap? do this before passing in key?
813 | --key = ast._string(key)
814 | self.key = assert(key)
815 | end
816 | function _indexself:serialize(consume)
817 | consume(self.expr)
818 | consume':'
819 | consume(self.key)
820 | end
821 |
822 | local _op = nodeclass'op'
823 | -- TODO 'args' a node ... or just flatten it into this node ...
824 | function _op:init(...)
825 | for i=1,select('#', ...) do
826 | self[i] = select(i, ...)
827 | end
828 | end
829 | function _op:serialize(consume)
830 | for i,x in ipairs(self) do
831 | consume(x)
832 | if i < #self then consume(self.op) end
833 | end
834 | end
835 |
836 | for _,info in ipairs{
837 | {'add','+'},
838 | {'sub','-'},
839 | {'mul','*'},
840 | {'div','/'},
841 | {'pow','^'},
842 | {'mod','%'},
843 | {'concat','..'},
844 | {'lt','<'},
845 | {'le','<='},
846 | {'gt','>'},
847 | {'ge','>='},
848 | {'eq','=='},
849 | {'ne','~='},
850 | {'and','and'},
851 | {'or','or'},
852 | {'idiv', '//'}, -- 5.3+
853 | {'band', '&'}, -- 5.3+
854 | {'bxor', '~'}, -- 5.3+
855 | {'bor', '|'}, -- 5.3+
856 | {'shl', '<<'}, -- 5.3+
857 | {'shr', '>>'}, -- 5.3+
858 | } do
859 | local op = info[2]
860 | local cl = nodeclass(info[1], _op)
861 | cl.op = op
862 | end
863 |
864 | for _,info in ipairs{
865 | {'unm','-'},
866 | {'not','not'},
867 | {'len','#'},
868 | {'bnot','~'}, -- 5.3+
869 | } do
870 | local op = info[2]
871 | local cl = nodeclass(info[1], _op)
872 | cl.op = op
873 | function cl:init(...)
874 | for i=1,select('#', ...) do
875 | self[i] = select(i, ...)
876 | end
877 | end
878 | function cl:serialize(consume)
879 | consume(self.op)
880 | consume(self[1]) -- spaces required for 'not'
881 | end
882 | end
883 |
884 | local _goto = nodeclass('goto', _stmt)
885 | function _goto:init(name)
886 | self.name = name
887 | end
888 | function _goto:serialize(consume)
889 | consume'goto'
890 | consume(self.name)
891 | end
892 |
893 | local _label = nodeclass('label', _stmt)
894 | function _label:init(name)
895 | self.name = name
896 | end
897 | function _label:serialize(consume)
898 | consume'::'
899 | consume(self.name)
900 | consume'::'
901 | end
902 |
903 | return ast
904 |
--------------------------------------------------------------------------------
/lua/parser.lua:
--------------------------------------------------------------------------------
1 | local table = require 'ext.table'
2 | local assert = require 'ext.assert'
3 | local Parser = require 'parser.base.parser'
4 |
5 | local ast = require 'parser.lua.ast'
6 |
7 | local LuaTokenizer = require 'parser.lua.tokenizer'
8 |
9 | local LuaParser = Parser:subclass()
10 |
11 | -- save the namespace here, for Parser:setData()
12 | LuaParser.ast = ast
13 |
14 | -- static function
15 | function LuaParser.parse(data, source, ...)
16 | local parser = LuaParser(nil, nil, ...)
17 | local result = table.pack(parser:setData(data, source))
18 | if not result[1] then return result:unpack() end
19 | return parser.tree
20 | end
21 |
22 | -- TODO instead of version and useluajit, how about parseFlags, and enable/disable them depending on the version
23 | function LuaParser:init(data, version, source, useluajit)
24 | self.version = version or _VERSION:match'^Lua (.*)$'
25 | if useluajit == nil then
26 | -- I could test for _G.jit's presence, but what if luajit is compiled with jit off but still has LL language feature on ...
27 | -- TODO unified load shim layer , esp for lua 5.1 ...
28 | -- TODO TODO if langfix's load has been replaced then this will segfault...
29 | -- we are detecting LL / ULL suffix, but using load to do so causes some recursion problems (since in some cases I've already overridden load() via ext.load and parser.load_xform ...)
30 | --local _load = loadstring or load
31 | --useluajit = _load'return 1LL'
32 | -- ... so instead, for now just assume jit's presence implies luajit implies LL / ULL for parsing
33 | useluajit = not not _G.jit
34 | end
35 | self.useluajit = not not useluajit
36 |
37 | -- TODO between this and parser.grammar, make a table-based way to specify the rules
38 | -- TODO TODO a token DAG from the grammar would be nice ...
39 | -- [[ what to name this ...
40 | self.parseExprPrecedenceRulesAndClassNames = table{
41 | {
42 | name = 'or',
43 | rules = {
44 | {token='or', className='_or'},
45 | },
46 | },
47 | {
48 | name = 'and',
49 | rules = {
50 | {token='and', className='_and'},
51 | },
52 | },
53 | {
54 | name = 'cmp',
55 | rules = {
56 | {token='<', className='_lt'},
57 | {token='>', className='_gt'},
58 | {token='<=', className='_le'},
59 | {token='>=', className='_ge'},
60 | {token='~=', className='_ne'},
61 | {token='==', className='_eq'},
62 | },
63 | },
64 | }:append(
65 | self.version < '5.3' and nil or table{
66 | {
67 | name = 'bor',
68 | rules = {
69 | {token='|', className='_bor'},
70 | },
71 | },
72 | {
73 | name = 'bxor',
74 | rules = {
75 | {token='~', className='_bxor'},
76 | },
77 | },
78 | {
79 | name = 'band',
80 | rules = {
81 | {token='&', className='_band'},
82 | },
83 | },
84 | {
85 | name = 'shift',
86 | rules = {
87 | {token='<<', className='_shl'},
88 | {token='>>', className='_shr'},
89 | },
90 | },
91 | }):append{
92 | {
93 | name = 'concat',
94 | rules = {
95 | {token='..', className='_concat'},
96 | },
97 | },
98 | {
99 | name = 'addsub', -- arithmetic
100 | rules = {
101 | {token='+', className='_add'},
102 | {token='-', className='_sub'},
103 | },
104 | },
105 | {
106 | name = 'muldivmod', -- geometric
107 | rules = {
108 | {token='*', className='_mul'},
109 | {token='/', className='_div'},
110 | {token='%', className='_mod'},
111 | -- if version < 5.3 then the // symbol won't be added to the tokenizer anyways...
112 | {token='//', className='_idiv'},
113 | },
114 | },
115 | {
116 | name = 'unary',
117 | unaryLHS = true,
118 | rules = {
119 | {token='not', className='_not'},
120 | {token='#', className='_len'}, -- only a 5.1+ token
121 | {token='-', className='_unm'},
122 | {token='~', className='_bnot'}, -- only a 5.3+ token
123 | },
124 | },
125 | {
126 | name = 'pow',
127 | rules = {
128 | {token='^', className='_pow', nextLevel='unary'},
129 | },
130 | },
131 | }
132 | --]]
133 |
134 | if data then
135 | -- can't return from init so gotta error ...
136 | assert(self:setData(data, source))
137 | end
138 | end
139 |
140 | function LuaParser:setData(data, source)
141 | self.gotos = {} -- keep track of all gotos
142 | self.labels = {} -- keep track of all labels
143 | self.blockStack = table()
144 | self.functionStack = table{'function-vararg'}
145 |
146 | local result = table.pack(LuaParser.super.setData(self, data))
147 | if not result[1] then
148 | return result:unpack()
149 | end
150 |
151 | -- last verify that all gotos went to all labels
152 | for _,g in pairs(self.gotos) do
153 | if not self.labels[g.name] then
154 | return false, "line "..g.span.to.line..": no visible label '"..g.name.."' for "
155 | end
156 | end
157 | return true
158 | end
159 |
160 | function LuaParser:buildTokenizer(data)
161 | return LuaTokenizer(data, self.version, self.useluajit)
162 | end
163 |
164 | -- default entry point for parsing data sources
165 | function LuaParser:parseTree()
166 | return self:parse_chunk()
167 | end
168 |
169 | function LuaParser:parse_chunk()
170 | local from = self:getloc()
171 | local stmts = table()
172 | if self.version >= '5.2' or self.useluajit then
173 | -- preceding ;'s allowed
174 | while self:canbe(';', 'symbol') do end
175 | end
176 | repeat
177 | local stmt = self:parse_stat()
178 | if not stmt then break end
179 | stmts:insert(stmt)
180 | self:canbe(';', 'symbol')
181 | until false
182 | local laststat = self:parse_retstat()
183 | if laststat then
184 | stmts:insert(laststat)
185 | self:canbe(';', 'symbol')
186 | end
187 | return self:node('_block', table.unpack(stmts))
188 | :setspan{from = from, to = self:getloc()}
189 | end
190 |
191 | function LuaParser:parse_block(blockName)
192 | if blockName then self.blockStack:insert(blockName) end
193 | local chunk = self:parse_chunk()
194 | if blockName then assert.eq(self.blockStack:remove(), blockName) end
195 | return chunk
196 | end
197 |
198 | function LuaParser:parse_stat()
199 | local from = self:getloc()
200 | if self:canbe('local', 'keyword') then
201 | local ffrom = self:getloc()
202 | if self:canbe('function', 'keyword') then
203 | local namevar = self:parse_var()
204 | if not namevar then error{msg="expected name"} end
205 | return self:node('_local', {
206 | self:makeFunction(
207 | namevar,
208 | table.unpack((assert(self:parse_funcbody(), {msg="expected function body"})))
209 | ):setspan{from = ffrom , to = self:getloc()}
210 | }):setspan{from = from , to = self:getloc()}
211 | else
212 | local afrom = self:getloc()
213 | local namelist = assert(self:parse_attnamelist(), {msg="expected attr name list"})
214 | if self:canbe('=', 'symbol') then
215 | local explist = assert(self:parse_explist(), {msg="expected expr list"})
216 | local assign = self:node('_assign', namelist, explist)
217 | :setspan{from = ffrom, to = self:getloc()}
218 | return self:node('_local', {assign})
219 | :setspan{from = from, to = self:getloc()}
220 | else
221 | return self:node('_local', namelist)
222 | :setspan{from = from, to = self:getloc()}
223 | end
224 | end
225 | elseif self:canbe('function', 'keyword') then
226 | local funcname = self:parse_funcname()
227 | return self:makeFunction(funcname, table.unpack((assert(self:parse_funcbody(), {msg="expected function body"}))))
228 | :setspan{from = from , to = self:getloc()}
229 | elseif self:canbe('for', 'keyword') then
230 | local namelist = assert(self:parse_namelist(), {msg="expected name list"})
231 | if self:canbe('=', 'symbol') then
232 | assert.eq(#namelist, 1, {msg="expected only one name in for loop"})
233 | local explist = assert(self:parse_explist(), {msg="expected exp list"})
234 | assert.ge(#explist, 2, {msg="bad for loop"})
235 | assert.le(#explist, 3, {msg="bad for loop"})
236 | local doloc = self:getloc()
237 | self:mustbe('do', 'keyword')
238 | local block = assert(self:parse_block'for =', {msg="for loop expected block"})
239 | self:mustbe('end', 'keyword', 'do', doloc)
240 | return self:node('_foreq', namelist[1], explist[1], explist[2], explist[3], table.unpack(block))
241 | :setspan{from = from, to = self:getloc()}
242 | elseif self:canbe('in', 'keyword') then
243 | local explist = assert(self:parse_explist(), {msg="expected expr list"})
244 | local doloc = self:getloc()
245 | self:mustbe('do', 'keyword')
246 | local block = assert(self:parse_block'for in', {msg="expected block"})
247 | self:mustbe('end', 'keyword', 'do', doloc)
248 | return self:node('_forin', namelist, explist, table.unpack(block))
249 | :setspan{from = from, to = self:getloc()}
250 | else
251 | error{msg="'=' or 'in' expected"}
252 | end
253 | elseif self:canbe('if', 'keyword') then
254 | local cond = assert(self:parse_exp(), {msg="unexpected symbol"})
255 | self:mustbe('then', 'keyword')
256 | local block = self:parse_block()
257 | local stmts = table(block)
258 | -- ...and add elseifs and else to this
259 | local efrom = self:getloc()
260 | while self:canbe('elseif', 'keyword') do
261 | local cond = assert(self:parse_exp(), {msg='unexpected symbol'})
262 | self:mustbe('then', 'keyword')
263 | stmts:insert(
264 | self:node('_elseif', cond, table.unpack((assert(self:parse_block(), {msg='expected block'}))))
265 | :setspan{from = efrom, to = self:getloc()}
266 | )
267 | efrom = self:getloc()
268 | end
269 | if self:canbe('else', 'keyword') then
270 | stmts:insert(
271 | self:node('_else', table.unpack((assert(self:parse_block(), {msg='expected block'}))))
272 | :setspan{from = efrom, to = self:getloc()}
273 | )
274 | end
275 | self:mustbe('end', 'keyword', 'if', from)
276 | return self:node('_if', cond, table.unpack(stmts))
277 | :setspan{from = from, to = self:getloc()}
278 | elseif self:canbe('repeat', 'keyword') then
279 | local block = assert(self:parse_block'repeat', {msg='expected block'})
280 | self:mustbe('until', 'keyword')
281 | return self:node(
282 | '_repeat',
283 | (assert(self:parse_exp(), {msg='unexpected symbol'})),
284 | table.unpack(block)
285 | ):setspan{from = from, to = self:getloc()}
286 | elseif self:canbe('while', 'keyword') then
287 | local cond = assert(self:parse_exp(), {msg='unexpected symbol'})
288 | local doloc = self:getloc()
289 | self:mustbe('do', 'keyword')
290 | local block = assert(self:parse_block'while', {msg='expected block'})
291 | self:mustbe('end', 'keyword', 'do', doloc)
292 | return self:node('_while', cond, table.unpack(block))
293 | :setspan{from = from, to = self:getloc()}
294 | elseif self:canbe('do', 'keyword') then
295 | local block = assert(self:parse_block(), {msg='expected block'})
296 | self:mustbe('end', 'keyword', 'do', from)
297 | return self:node('_do', table.unpack(block))
298 | :setspan{from = from, to = self:getloc()}
299 | elseif self.version >= '5.2' then
300 | if self:canbe('goto', 'keyword') then
301 | local name = self:mustbe(nil, 'name')
302 | local g = self:node('_goto', name)
303 | :setspan{from = from, to = self:getloc()}
304 | self.gotos[name] = g
305 | return g
306 | -- lua5.2+ break is a statement, so you can have multiple breaks in a row with no syntax error
307 | elseif self:canbe('break', 'keyword') then
308 | return self:parse_break()
309 | :setspan{from = from, to = self:getloc()}
310 | elseif self:canbe('::', 'symbol') then
311 | local name = self:mustbe(nil, 'name')
312 | local l = self:node('_label', name)
313 | self.labels[name] = true
314 | self:mustbe('::', 'symbol')
315 | return l:setspan{from = from, to = self:getloc()}
316 | end
317 | end
318 |
319 | -- now we handle functioncall and varlist = explist rules
320 |
321 | --[[
322 | stat ::= varlist `=` explist | functioncall
323 | varlist ::= var {`,` var}
324 | var ::= Name | prefixexp `[` exp `]` | prefixexp `.` Name
325 | prefixexp ::= var | functioncall | `(` exp `)`
326 | functioncall ::= prefixexp args | prefixexp `:` Name args
327 | right now prefixexp is designed to process trailing args ...
328 | ... so just use it and complain if the wrapping ast is not a _call
329 | likewise with var, complain if it is a call
330 | --]]
331 |
332 | local prefixexp = self:parse_prefixexp()
333 | if prefixexp then
334 | if self.ast._call:isa(prefixexp) then -- function call
335 | return prefixexp
336 | else -- varlist assignment
337 | local vars = table{prefixexp}
338 | while self:canbe(',', 'symbol') do
339 | local var = assert(self:parse_prefixexp(), {msg='expected expr'})
340 | assert.ne(var.type, 'call', {msg="syntax error"})
341 | vars:insert(var)
342 | end
343 | return self:parse_assign(vars, from)
344 | end
345 | end
346 | end
347 |
348 | function LuaParser:parse_assign(vars, from)
349 | self:mustbe('=', 'symbol')
350 | return self:node('_assign', vars, (assert(self:parse_explist(), {msg='expected expr'})))
351 | :setspan{from = from, to = self:getloc()}
352 | end
353 |
354 | -- 'laststat' in 5.1, 'retstat' in 5.2+
355 | function LuaParser:parse_retstat()
356 | local from = self:getloc()
357 | -- lua5.2+ break is a statement, so you can have multiple breaks in a row with no syntax error
358 | -- that means only handle 'break' here in 5.1
359 | if self.version <= '5.1' and self:canbe('break', 'keyword') then
360 | return self:parse_break()
361 | :setspan{from = from, to = self:getloc()}
362 | end
363 | if self:canbe('return', 'keyword') then
364 | local explist = self:parse_explist() or {}
365 | return self:node('_return', table.unpack(explist))
366 | :setspan{from = from, to = self:getloc()}
367 | end
368 | end
369 |
370 | -- verify we're in a loop, then return the break
371 |
372 | function LuaParser:parse_break()
373 | local from = self:getloc()
374 | if not ({['while']=1, ['repeat']=1, ['for =']=1, ['for in']=1})[self.blockStack:last()] then
375 | error{msg="break not inside loop"}
376 | end
377 | return self:node('_break')
378 | :setspan{from = from, to = self:getloc()}
379 | end
380 |
381 |
382 | function LuaParser:parse_funcname()
383 | local from = self:getloc()
384 | local name = self:parse_var()
385 | if not name then return end
386 | while self:canbe('.', 'symbol') do
387 | local sfrom = self.t:getloc()
388 | name = self:node('_index',
389 | name,
390 | self:node('_string', self:mustbe(nil, 'name'))
391 | :setspan{from = sfrom, to = self:getloc()}
392 | ):setspan{from = from, to = self:getloc()}
393 | end
394 | if self:canbe(':', 'symbol') then
395 | name = self:node('_indexself', name, self:mustbe(nil, 'name'))
396 | :setspan{from = from, to = self:getloc()}
397 | end
398 | return name
399 | end
400 |
401 | -- parses a varialbe name, without attribs, and returns it in a '_var' node
402 | function LuaParser:parse_var()
403 | local from = self:getloc()
404 | local name = self:canbe(nil, 'name')
405 | if not name then return end
406 | return self:node('_var', name)
407 | :setspan{from=from, to=self:getloc()}
408 | end
409 |
410 | function LuaParser:parse_namelist()
411 | local var = self:parse_var()
412 | if not var then return end
413 | local names = table{var}
414 | while self:canbe(',', 'symbol') do
415 | names:insert((assert(self:parse_var(), {msg="expected name"})))
416 | end
417 | return names
418 | end
419 |
420 | -- same as above but with optional attributes
421 |
422 | function LuaParser:parse_attnamelist()
423 | local from = self:getloc()
424 | local name = self:canbe(nil, 'name')
425 | if not name then return end
426 | local attrib = self:parse_attrib()
427 | local names = table{
428 | self:node('_var', name, attrib)
429 | :setspan{from = from, to = self:getloc()}
430 | }
431 | while self:canbe(',', 'symbol') do
432 | from = self:getloc()
433 | local name = self:mustbe(nil, 'name')
434 | local attrib = self:parse_attrib()
435 | names:insert(
436 | self:node('_var', name, attrib)
437 | :setspan{from = from, to = self:getloc()}
438 | )
439 | end
440 | return names
441 | end
442 |
443 | function LuaParser:parse_attrib()
444 | if self.version < '5.4' then return end
445 | local attrib
446 | if self:canbe('<', 'symbol') then
447 | attrib = self:mustbe(nil, 'name')
448 | self:mustbe('>', 'symbol')
449 | end
450 | return attrib
451 | end
452 |
453 | function LuaParser:parse_explist()
454 | local exp = self:parse_exp()
455 | if not exp then return end
456 | local exps = table{exp}
457 | while self:canbe(',', 'symbol') do
458 | exps:insert((assert(self:parse_exp(), {msg='unexpected symbol'})))
459 | end
460 | return exps
461 | end
462 |
463 | --[[
464 | exp ::= nil | false | true | Numeral | LiteralString | `...` | function | prefixexp | tableconstructor | exp binop exp | unop exp
465 | ... splitting this into two ...
466 | exp ::= [unop] subexp {binop [unop] subexp}
467 | subexp ::= nil | false | true | Numeral | LiteralString | `...` | function | prefixexp | tableconstructor
468 | --]]
469 |
470 | function LuaParser:parse_exp()
471 | return self:parse_expr_precedenceTable(1)
472 | end
473 |
474 | function LuaParser:parse_subexp()
475 | local tableconstructor = self:parse_tableconstructor()
476 | if tableconstructor then return tableconstructor end
477 |
478 | local prefixexp = self:parse_prefixexp()
479 | if prefixexp then return prefixexp end
480 |
481 | local functiondef = self:parse_functiondef()
482 | if functiondef then return functiondef end
483 |
484 | local from = self:getloc()
485 | if self:canbe('...', 'symbol') then
486 | if self.version == '5.0' then error{msg="unexpected symbol near '...'"} end
487 | assert.eq(self.functionStack:last(), 'function-vararg', {msg='unexpected symbol'})
488 | return self:node('_vararg')
489 | :setspan{from = from, to = self:getloc()}
490 | end
491 | if self:canbe(nil, 'string') then
492 | return self:node('_string', self.lasttoken)
493 | :setspan{from = from, to = self:getloc()}
494 | end
495 | if self:canbe(nil, 'number') then
496 | return self:node('_number', self.lasttoken)
497 | :setspan{from = from, to = self:getloc()}
498 | end
499 | if self:canbe('true', 'keyword') then
500 | return self:node('_true')
501 | :setspan{from = from, to = self:getloc()}
502 | end
503 | if self:canbe('false', 'keyword') then
504 | return self:node('_false')
505 | :setspan{from = from, to = self:getloc()}
506 | end
507 | if self:canbe('nil', 'keyword') then
508 | return self:node('_nil')
509 | :setspan{from = from, to = self:getloc()}
510 | end
511 | end
512 |
513 | --[[
514 | prefixexp ::= var | functioncall | `(` exp `)`
515 |
516 | functioncall ::= prefixexp args | prefixexp `:` Name args
517 | combine...
518 | prefixexp ::= var | prefixexp args | prefixexp `:` Name args | `(` exp `)`
519 | var ::= Name | prefixexp `[` exp `]` | prefixexp `.` Name
520 | combine ...
521 | prefixexp ::= Name | prefixexp `[` exp `]` | prefixexp `.` Name | prefixexp args | prefixexp `:` Name args | `(` exp `)`
522 | simplify ...
523 | prefixexp ::= (Name {'[' exp ']' | `.` Name | [`:` Name] args} | `(` exp `)`) {args}
524 | --]]
525 |
526 | function LuaParser:parse_prefixexp()
527 | local prefixexp
528 | local from = self:getloc()
529 |
530 | if self:canbe('(', 'symbol') then
531 | local exp = assert(self:parse_exp(), {msg='unexpected symbol'})
532 | self:mustbe(')', 'symbol')
533 | prefixexp = self:node('_par', exp)
534 | :setspan{from = from, to = self:getloc()}
535 | else
536 | prefixexp = self:parse_var()
537 | if not prefixexp then return end
538 | end
539 |
540 | while true do
541 | if self:canbe('[', 'symbol') then
542 | prefixexp = self:node('_index', prefixexp, (assert(self:parse_exp(), {msg='unexpected symbol'})))
543 | self:mustbe(']', 'symbol')
544 | prefixexp:setspan{from = from, to = self:getloc()}
545 | elseif self:canbe('.', 'symbol') then
546 | local sfrom = self:getloc()
547 | prefixexp = self:node('_index',
548 | prefixexp,
549 | self:node('_string', self:mustbe(nil, 'name'))
550 | :setspan{from = sfrom, to = self:getloc()}
551 | )
552 | :setspan{from = from, to = self:getloc()}
553 | elseif self:canbe(':', 'symbol') then
554 | prefixexp = self:node('_indexself',
555 | prefixexp,
556 | self:mustbe(nil, 'name')
557 | ):setspan{from = from, to = self:getloc()}
558 | local args = self:parse_args()
559 | if not args then error{msg="function arguments expected"} end
560 | prefixexp = self:node('_call', prefixexp, table.unpack(args))
561 | :setspan{from = from, to = self:getloc()}
562 | else
563 | local args = self:parse_args()
564 | if not args then break end
565 |
566 | prefixexp = self:node('_call', prefixexp, table.unpack(args))
567 | :setspan{from = from, to = self:getloc()}
568 | end
569 | end
570 |
571 | return prefixexp
572 | end
573 |
574 | -- returns nil on fail to match, like all functions
575 | -- produces error on syntax error
576 | -- returns a table of the args -- particularly an empty table if no args were found
577 |
578 | function LuaParser:parse_args()
579 | local from = self:getloc()
580 | if self:canbe(nil, 'string') then
581 | return {
582 | self:node('_string', self.lasttoken)
583 | :setspan{from = from, to = self:getloc()}
584 | }
585 | end
586 |
587 | local tableconstructor = self:parse_tableconstructor()
588 | if tableconstructor then return {tableconstructor} end
589 |
590 | if self:canbe('(', 'symbol') then
591 | local explist = self:parse_explist()
592 | self:mustbe(')', 'symbol')
593 | return explist or {}
594 | end
595 | end
596 | -- helper which also includes the line and col in the function object
597 |
598 | function LuaParser:makeFunction(...)
599 | return self:node('_function', ...) -- no :setspan(), this is done by the caller
600 | end
601 | -- 'function' in the 5.1 syntax
602 |
603 | function LuaParser:parse_functiondef()
604 | local from = self:getloc()
605 | if not self:canbe('function', 'keyword') then return end
606 | return self:makeFunction(nil, table.unpack((assert(self:parse_funcbody(), {msg='expected function body'}))))
607 | :setspan{from = from, to = self:getloc()}
608 | end
609 | -- returns a table of ... first element is a table of args, rest of elements are the body statements
610 |
611 | function LuaParser:parse_funcbody()
612 | local funcloc = self:getloc()
613 | if not self:canbe('(', 'symbol') then return end
614 | local args = self:parse_parlist() or table()
615 | local lastArg = args:last()
616 | local functionType = self.ast._vararg:isa(lastArg) and 'function-vararg' or 'function'
617 | self:mustbe(')', 'symbol')
618 | self.functionStack:insert(functionType)
619 | local block = self:parse_block(functionType)
620 | assert.eq(self.functionStack:remove(), functionType)
621 | self:mustbe('end', 'keyword', 'function', funcloc)
622 | return table{args, table.unpack(block)}
623 | end
624 |
625 | function LuaParser:parse_parlist() -- matches namelist() with ... as a terminator
626 | local from = self:getloc()
627 | if self:canbe('...', 'symbol') then
628 | return table{
629 | self:node('_vararg')
630 | :setspan{from = from, to = self:getloc()}
631 | }
632 | end
633 |
634 | local namevar = self:parse_var()
635 | if not namevar then return end
636 | local names = table{namevar}
637 | while self:canbe(',', 'symbol') do
638 | from = self:getloc()
639 | if self:canbe('...', 'symbol') then
640 | names:insert(
641 | self:node('_vararg')
642 | :setspan{from = from, to = self:getloc()}
643 | )
644 | return names
645 | end
646 | local namevar = self:parse_var()
647 | if not namevar then error{msg="expected name"} end
648 | names:insert(namevar)
649 | end
650 | return names
651 | end
652 |
653 | function LuaParser:parse_tableconstructor()
654 | local from = self:getloc()
655 | if not self:canbe('{', 'symbol') then return end
656 | if self.version == '5.0' then
657 | -- despite what the 5.0 syntax says, it looks like the 5.0 parser will parse and ignore a leading semicolon as valid: {; 1, 2, 3, 4}
658 | self:canbe(';', 'symbol')
659 | end
660 | local fields = self:parse_fieldlist()
661 | self:mustbe('}', 'symbol')
662 | --[[ ok design flaw I didn't foresee when trying to unify all the AST as indexed children (like my symmath project)
663 | -- if this _table's children are too big then you can't unpack it into the ctor args...
664 | local result = self:node('_table', table.unpack(fields or {}))
665 | --]]
666 | -- [[ ... so instead, manually insert them...
667 | -- but a later TODO might be to go back to accepting a table-of-children.
668 | local result = self:node'_table'
669 | if fields then
670 | for i,field in ipairs(fields) do
671 | result[i] = field
672 | end
673 | end
674 | --]]
675 | result:setspan{from = from, to = self:getloc()}
676 | return result
677 | end
678 |
679 | function LuaParser:parse_fieldlist()
680 | local field = self:parse_field()
681 | if not field then return end
682 | local fields = table{field}
683 | while self:parse_fieldsep() do
684 | local field = self:parse_field()
685 | if not field then break end
686 | fields:insert(field)
687 | end
688 | self:parse_fieldsep()
689 | return fields
690 | end
691 |
692 | function LuaParser:parse_field()
693 | local from = self:getloc()
694 | if self:canbe('[', 'symbol') then
695 | local keyexp = assert(self:parse_exp(), {msg='unexpected symbol'})
696 | self:mustbe(']', 'symbol')
697 | self:mustbe('=', 'symbol')
698 | local valexp = self:parse_exp()
699 | if not valexp then error{msg="expected expression but found "..tostring(self.t.token)} end
700 | return self:node('_assign', {keyexp}, {valexp})
701 | :setspan{from = from, to = self:getloc()}
702 | end
703 |
704 | -- this will be Name or exp
705 | -- in the case that it is a Name then check for = exp
706 | local exp = self:parse_exp()
707 | if not exp then return end
708 |
709 | if self.ast._var:isa(exp) and self:canbe('=', 'symbol') then
710 | return self:node('_assign',
711 | {
712 | self:node('_string', exp.name):setspan(exp.span)
713 | }, {
714 | (assert(self:parse_exp(), {msg='unexpected symbol'}))
715 | }
716 | ):setspan{from = from, to = self:getloc()}
717 | else
718 | return exp
719 | end
720 | end
721 |
722 | function LuaParser:parse_fieldsep()
723 | return self:canbe(',', 'symbol') or self:canbe(';', 'symbol')
724 | end
725 |
726 | return LuaParser
727 |
--------------------------------------------------------------------------------
/lua/tokenizer.lua:
--------------------------------------------------------------------------------
1 | local table = require 'ext.table'
2 | local assert = require 'ext.assert'
3 | local Tokenizer = require 'parser.base.tokenizer'
4 |
5 | local LuaTokenizer = Tokenizer:subclass()
6 |
7 | --[[
8 | NOTICE this only needs to be initialized once per tokenizer, not per-data-source
9 | however at the moment it does need to be initialized once-per-version (as the extra arg to Tokenizer)
10 | maybe I should move it to static initialization and move version-based stuff to subclasses' static-init?
11 |
12 | So why 'symbols' vs 'keywords' ?
13 | 'Keywords' consist of valid names (names like variables functions etc use)
14 | while 'symbols' consist of everything else. (can symbols contain letters that names can use? at the moment they do not.)
15 | For this reason, when parsing, keywords need separated spaces, while symbols do not (except for distinguishing between various-sized symbols, i.e. < < vs <<).
16 | --]]
17 | function LuaTokenizer:initSymbolsAndKeywords(version, useluajit)
18 | -- store later for parseHexNumber
19 | self.version = assert(version)
20 | self.useluajit = useluajit
21 |
22 | for w in ([[... .. == ~= <= >= + - * / ^ < > = ( ) { } [ ] ; : , .]]):gmatch('%S+') do
23 | self.symbols:insert(w)
24 | end
25 |
26 | if version >= '5.1' then
27 | self.symbols:insert'#'
28 | self.symbols:insert'%'
29 | end
30 |
31 | for w in ([[and break do else elseif end false for function if in local nil not or repeat return then true until while]]):gmatch('%S+') do
32 | self.keywords[w] = true
33 | end
34 |
35 | -- TODO this will break because luajit doesn't care about versions
36 | -- if I use a load-test, the ext.load shim layer will break
37 | -- if I use a load('goto=true') test without ext.load then load() doens't accept strings for 5.1 when the goto isn't a keyword, so I might as well just test if load can load any string ...
38 | -- TODO separate language features from versions and put all the language options in a ctor table somewhere
39 | do--if version >= '5.2' then
40 | self.symbols:insert'::' -- for labels .. make sure you insert it before ::
41 | self.keywords['goto'] = true
42 | end
43 |
44 | if version >= '5.3' then -- and not useluajit then ... setting this fixes some validation tests, but setting it breaks langfix+luajit ... TODO straighten out parser/version configuration
45 | self.symbols:insert'//'
46 | self.symbols:insert'~'
47 | self.symbols:insert'&'
48 | self.symbols:insert'|'
49 | self.symbols:insert'<<'
50 | self.symbols:insert'>>'
51 | end
52 | end
53 |
54 | function LuaTokenizer:init(...)
55 | LuaTokenizer.super.init(self, ...)
56 |
57 | -- skip past initial #'s
58 | local r = self.r
59 | if r.data:sub(1,1) == '#' then
60 | if not r:seekpast'\n' then
61 | r:seekpast'$'
62 | end
63 | end
64 | end
65 |
66 | function LuaTokenizer:parseBlockComment()
67 | local r = self.r
68 | -- look for --[====[
69 | if not r:canbe'%-%-%[=*%[' then return end
70 | self:readRestOfBlock(r.lasttoken)
71 | return true
72 | end
73 |
74 | function LuaTokenizer:parseString()
75 | -- try to parse block strings
76 | if self:parseBlockString() then return true end
77 |
78 | -- try for base's quote strings
79 | return LuaTokenizer.super.parseString(self)
80 | end
81 |
82 | -- Lua-specific block strings
83 | function LuaTokenizer:parseBlockString()
84 | local r = self.r
85 | if not r:canbe'%[=*%[' then return end
86 | if self:readRestOfBlock(r.lasttoken) then
87 | --DEBUG(@5): print('read multi-line string ['..(r.index-#r.lasttoken)..','..r.index..']: '..r.lasttoken)
88 | coroutine.yield(r.lasttoken, 'string')
89 | return true
90 | end
91 | end
92 |
93 | function LuaTokenizer:readRestOfBlock(startToken)
94 | local r = self.r
95 |
96 | local eq = assert(startToken:match('%[(=*)%[$'))
97 | -- skip whitespace?
98 | r:canbe'\n' -- if the first character is a newline then skip it
99 | local start = r.index
100 | if not r:seekpast('%]'..eq..'%]') then
101 | error{msg="expected closing block"}
102 | end
103 | -- since we used seekpast, the string isn't being captured as a lasttoken ...
104 | --return r:setlasttoken(r.data:sub(start, r.index - #r.lasttoken - 1))
105 | -- ... so don't push it into the history here, just assign it.
106 | r.lasttoken = r.data:sub(start, r.index - #r.lasttoken - 1)
107 | return r.lasttoken
108 | end
109 |
110 |
111 | function LuaTokenizer:parseHexNumber(...)
112 | local r = self.r
113 | -- if version is 5.2 then allow decimals in hex #'s, and use 'p's instead of 'e's for exponents
114 | if self.version >= '5.2' then
115 | -- TODO this looks like the float-parse code below (but with e+- <-> p+-) but meh I'm lazy so I just copied it.
116 | local token = r:canbe'[%.%da-fA-F]+'
117 | local numdots = #token:gsub('[^%.]','')
118 | assert.le(numdots, 1, {msg='malformed number'})
119 | local n = table{'0x', token}
120 | if r:canbe'p' then
121 | n:insert(r.lasttoken)
122 | -- fun fact, while the hex float can include hex digits, its 'p+-' exponent must be in decimal.
123 | n:insert(r:mustbe('[%+%-]%d+', 'malformed number'))
124 | elseif numdots == 0 and self.useluajit then
125 | if r:canbe'LL' then
126 | n:insert'LL'
127 | elseif r:canbe'ULL' then
128 | n:insert'ULL'
129 | end
130 | end
131 | coroutine.yield(n:concat(), 'number')
132 | else
133 | --return LuaTokenizer.super.parseHexNumber(self, ...)
134 | local token = r:mustbe('[%da-fA-F]+', 'malformed number')
135 | local n = table{'0x', token}
136 | if self.useluajit then
137 | if r:canbe'LL' then
138 | n:insert'LL'
139 | elseif r:canbe'ULL' then
140 | n:insert'ULL'
141 | end
142 | end
143 | coroutine.yield(n:concat(), 'number')
144 | end
145 | end
146 |
147 | function LuaTokenizer:parseDecNumber()
148 | local r = self.r
149 | local token = r:canbe'[%.%d]+'
150 | local numdots = #token:gsub('[^%.]','')
151 | assert.le(numdots, 1, {msg='malformed number'})
152 | local n = table{token}
153 | if r:canbe'e' then
154 | n:insert(r.lasttoken)
155 | n:insert(r:mustbe('[%+%-]%d+', 'malformed number'))
156 | elseif numdots == 0 and self.useluajit then
157 | if r:canbe'LL' then
158 | n:insert'LL'
159 | elseif r:canbe'ULL' then
160 | n:insert'ULL'
161 | end
162 | end
163 | coroutine.yield(n:concat(), 'number')
164 | end
165 |
166 | return LuaTokenizer
167 |
--------------------------------------------------------------------------------
/parser.lua:
--------------------------------------------------------------------------------
1 | -- me moving classes around
2 | -- TODO get rid of this file and rename all `require 'parser'` to `require 'parser.lua.parser'` ... or maybe ...
3 | -- ... maybe that's a bad idea, because it is more verbose ...
4 | -- maybe instead of forwarding LuaParser, I should just write some wrapper function shere, like parser.parse(...) to auto-construct a LuaParser and return its tree ...
5 | return require 'parser.lua.parser'
6 |
--------------------------------------------------------------------------------
/parser.rockspec:
--------------------------------------------------------------------------------
1 | package = "parser"
2 | version = "dev-1"
3 | source = {
4 | url = "git+https://github.com/thenumbernine/lua-parser"
5 | }
6 | description = {
7 | summary = "Lua Parser in Lua",
8 | detailed = "Lua Parser in Lua",
9 | homepage = "https://github.com/thenumbernine/lua-parser",
10 | license = "MIT"
11 | }
12 | dependencies = {
13 | "lua ~> 5.3"
14 | }
15 | build = {
16 | type = "builtin",
17 | modules = {
18 | ["parser"] = "parser.lua",
19 | ["parser.load_xform"] = "load_xform.lua",
20 | ["parser.base.ast"] = "base/ast.lua",
21 | ["parser.base.datareader"] = "base/datareader.lua",
22 | ["parser.base.parser"] = "base/parser.lua",
23 | ["parser.base.tokenizer"] = "base/tokenizer.lua",
24 | ["parser.grammar.parser"] = "grammar/parser.lua",
25 | ["parser.grammar.tokenizer"] = "grammar/tokenizer.lua",
26 | ["parser.lua.ast"] = "lua/ast.lua",
27 | ["parser.lua.parser"] = "lua/parser.lua",
28 | ["parser.lua.tokenizer"] = "lua/tokenizer.lua",
29 | },
30 | copy_directories = {
31 | "tests"
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/syntax_5.0.txt:
--------------------------------------------------------------------------------
1 | chunk ::= {stat [`;´]} ;
2 |
3 | block ::= chunk ;
4 |
5 | stat ::= varlist1 `=´ explist1
6 | | functioncall
7 | | do block end
8 | | while exp do block end
9 | | repeat block until exp
10 | | if exp then block {elseif exp then block} [else block] end
11 | | return [explist1]
12 | | break
13 | | for Name `=´ exp `,´ exp [`,´ exp] do block end
14 | | for Name {`,´ Name} in explist1 do block end
15 | | function funcname funcbody
16 | | local function Name funcbody
17 | | local namelist [init] ;
18 |
19 | funcname ::= Name {`.´ Name} [`:´ Name] ;
20 |
21 | varlist1 ::= var {`,´ var} ;
22 |
23 | var ::= Name
24 | | prefixexp `[´ exp `]´
25 | | prefixexp `.´ Name ;
26 |
27 | namelist ::= Name {`,´ Name} ;
28 |
29 | init ::= `=´ explist1 ;
30 |
31 | explist1 ::= {exp `,´} exp ;
32 |
33 | exp ::= nil
34 | | false
35 | | true
36 | | Number
37 | | Literal
38 | | function
39 | | prefixexp
40 | | tableconstructor
41 | | exp binop exp
42 | | unop exp ;
43 |
44 | prefixexp ::= var
45 | | functioncall
46 | | `(´ exp `)´ ;
47 |
48 | functioncall ::= prefixexp args
49 | | prefixexp `:´ Name args ;
50 |
51 | args ::= `(´ [explist1] `)´
52 | | tableconstructor
53 | | Literal ;
54 |
55 | function ::= function funcbody ;
56 |
57 | funcbody ::= `(´ [parlist] `)´ block end ;
58 |
59 | parlist ::= Name {`,´ Name} [`,´ `...´]
60 | | `...´ ;
61 |
62 | tableconstructor ::= `{´ [fieldlist] `}´ ;
63 |
64 | fieldlist ::= field {fieldsep field} [fieldsep] ;
65 |
66 | field ::= `[´ exp `]´ `=´ exp
67 | | name `=´ exp
68 | | exp ;
69 |
70 | fieldsep ::= `,´
71 | | `;´ ;
72 |
73 | binop ::= `+´
74 | | `-´
75 | | `*´
76 | | `/´
77 | | `^´
78 | | `..´
79 | | `<´
80 | | `<=´
81 | | `>´
82 | | `>=´
83 | | `==´
84 | | `~=´
85 | | and
86 | | or ;
87 |
88 | unop ::= `-´
89 | | not ;
90 |
--------------------------------------------------------------------------------
/syntax_5.1.txt:
--------------------------------------------------------------------------------
1 | chunk ::= {stat [';']} [laststat [';']] ;
2 |
3 | block ::= chunk ;
4 |
5 | stat ::= varlist '=' explist
6 | | functioncall
7 | | 'do' block 'end'
8 | | 'while' exp 'do' block 'end'
9 | | 'repeat' block 'until' exp
10 | | 'if' exp 'then' block {'elseif' exp 'then' block} ['else' block] 'end'
11 | | 'for' Name '=' exp ',' exp [',' exp] 'do' block 'end'
12 | | 'for' namelist 'in' explist 'do' block 'end'
13 | | 'function' funcname funcbody
14 | | 'local' 'function' Name funcbody
15 | | 'local' namelist ['=' explist]
16 | ;
17 |
18 | laststat ::= 'return' [explist]
19 | | 'break'
20 | ;
21 |
22 | funcname ::= Name {'.' Name} [':' Name] ;
23 |
24 | varlist ::= var {',' var} ;
25 |
26 | var ::= Name
27 | | prefixexp '[' exp ']'
28 | | prefixexp '.' Name
29 | ;
30 |
31 | namelist ::= Name {',' Name} ;
32 |
33 | explist ::= {exp ','} exp ;
34 |
35 | exp ::= 'nil'
36 | | 'false'
37 | | 'true'
38 | | Numeral
39 | | LiteralString
40 | | '...'
41 | | functiondef
42 | | prefixexp
43 | | tableconstructor
44 | | exp binop exp
45 | | unop exp
46 | ;
47 |
48 | prefixexp ::= var
49 | | functioncall
50 | | '(' exp ')'
51 | ;
52 |
53 | functioncall ::= prefixexp args
54 | | prefixexp ':' Name args
55 | ;
56 |
57 | args ::= '(' [explist] ')'
58 | | tableconstructor
59 | | LiteralString
60 | ;
61 |
62 | functiondef ::= 'function' funcbody ;
63 |
64 | funcbody ::= '(' [parlist] ')' block 'end' ;
65 |
66 | parlist ::= namelist [',' '...']
67 | | '...'
68 | ;
69 |
70 | tableconstructor ::= '{' [fieldlist] '}' ;
71 |
72 | fieldlist ::= field {fieldsep field} [fieldsep] ;
73 |
74 | field ::= '[' exp ']' '=' exp
75 | | Name '=' exp
76 | | exp
77 | ;
78 |
79 | fieldsep ::= ','
80 | | ';'
81 | ;
82 |
83 | binop ::= '+'
84 | | '-'
85 | | '*'
86 | | '/'
87 | | '^'
88 | | '%'
89 | | '..'
90 | | '<'
91 | | '<='
92 | | '>'
93 | | '>='
94 | | '=='
95 | | '~='
96 | | 'and'
97 | | 'or'
98 | ;
99 |
100 | unop ::= '-'
101 | | 'not'
102 | | '#'
103 | ;
104 |
105 | -- Name ::= ... how to define valid names ...
106 | -- Numeral ::= ... how to define numerals ...
107 | -- LiteralString ::= how to define literal strings ...
108 |
--------------------------------------------------------------------------------
/syntax_5.2.txt:
--------------------------------------------------------------------------------
1 | chunk ::= block ;
2 |
3 | block ::= {stat} [retstat] ;
4 |
5 | retstat ::= 'return' [explist] [';'] ;
6 |
7 | stat ::= ';'
8 | | varlist '=' explist
9 | | functioncall
10 | | label
11 | | 'break'
12 | | 'goto' Name
13 | | 'do' block 'end'
14 | | 'while' exp 'do' block 'end'
15 | | 'repeat' block 'until' exp
16 | | 'if' exp 'then' block {'elseif' exp 'then' block} ['else' block] 'end'
17 | | 'for' Name '=' exp ',' exp [',' exp] 'do' block 'end'
18 | | 'for' namelist 'in' explist 'do' block 'end'
19 | | 'function' funcname funcbody
20 | | 'local' 'function' Name funcbody
21 | | 'local' namelist ['=' explist]
22 | ;
23 |
24 | varlist ::= var {',' var} ;
25 |
26 | funcname ::= Name {'.' Name} [':' Name] ;
27 |
28 | label ::= '::' Name '::' ;
29 |
30 | var ::= Name
31 | | prefixexp '[' exp ']'
32 | | prefixexp '.' Name
33 | ;
34 |
35 | namelist ::= Name {',' Name} ;
36 |
37 | explist ::= exp {',' exp} ;
38 |
39 | exp ::= 'nil'
40 | | 'false'
41 | | 'true'
42 | | Numeral
43 | | LiteralString
44 | | '...'
45 | | functiondef
46 | | prefixexp
47 | | tableconstructor
48 | | exp binop exp
49 | | unop exp
50 | ;
51 |
52 | prefixexp ::= var
53 | | functioncall
54 | | '(' exp ')'
55 | ;
56 |
57 | functioncall ::= prefixexp args
58 | | prefixexp ':' Name args ;
59 |
60 | args ::= '(' [explist] ')'
61 | | tableconstructor
62 | | LiteralString
63 | ;
64 |
65 | functiondef ::= 'function' funcbody ;
66 |
67 | funcbody ::= '(' [parlist] ')' block 'end' ;
68 |
69 | parlist ::= namelist [',' '...']
70 | | '...'
71 | ;
72 |
73 | tableconstructor ::= '{' [fieldlist] '}' ;
74 |
75 | fieldlist ::= field {fieldsep field} [fieldsep] ;
76 |
77 | field ::= '[' exp ']' '=' exp
78 | | Name '=' exp
79 | | exp
80 | ;
81 |
82 | fieldsep ::= ',' | ';' ;
83 |
84 | binop ::= '+'
85 | | '-'
86 | | '*'
87 | | '/'
88 | | '^'
89 | | '%'
90 | | '..'
91 | | '<'
92 | | '<='
93 | | '>'
94 | | '>='
95 | | '=='
96 | | '~='
97 | | 'and'
98 | | 'or'
99 | ;
100 |
101 | unop ::= '-'
102 | | 'not'
103 | | '#'
104 | ;
105 |
--------------------------------------------------------------------------------
/syntax_5.3.txt:
--------------------------------------------------------------------------------
1 | chunk ::= block ;
2 |
3 | block ::= {stat} [retstat] ;
4 |
5 | stat ::= ';'
6 | | varlist '=' explist
7 | | functioncall
8 | | label
9 | | break
10 | | goto Name
11 | | do block end
12 | | while exp do block end
13 | | repeat block until exp
14 | | if exp then block {elseif exp then block} [else block] end
15 | | for Name '=' exp ',' exp [',' exp] do block end
16 | | for namelist in explist do block end
17 | | function funcname funcbody
18 | | local function Name funcbody
19 | | local namelist ['=' explist]
20 | ;
21 |
22 | retstat ::= return [explist] [';'] ;
23 |
24 | label ::= '::' Name '::' ;
25 |
26 | funcname ::= Name {'.' Name} [':' Name] ;
27 |
28 | varlist ::= var {',' var} ;
29 |
30 | var ::= Name
31 | | prefixexp '[' exp ']'
32 | | prefixexp '.' Name
33 | ;
34 |
35 | namelist ::= Name {',' Name} ;
36 |
37 | explist ::= exp {',' exp} ;
38 |
39 | exp ::= nil
40 | | false
41 | | true
42 | | Numeral
43 | | LiteralString
44 | | '...'
45 | | functiondef
46 | | prefixexp
47 | | tableconstructor
48 | | exp binop exp
49 | | unop exp
50 | ;
51 |
52 | prefixexp ::= var
53 | | functioncall
54 | | '(' exp ')'
55 | ;
56 |
57 | functioncall ::= prefixexp args
58 | | prefixexp ':' Name args
59 | ;
60 |
61 | args ::= '(' [explist] ')'
62 | | tableconstructor
63 | | LiteralString
64 | ;
65 |
66 | functiondef ::= function funcbody ;
67 |
68 | funcbody ::= '(' [parlist] ')' block end ;
69 |
70 | parlist ::= namelist [',' '...']
71 | | '...'
72 | ;
73 |
74 | tableconstructor ::= '{' [fieldlist] '}' ;
75 |
76 | fieldlist ::= field {fieldsep field} [fieldsep] ;
77 |
78 | field ::= '[' exp ']' '=' exp
79 | | Name '=' exp
80 | | exp
81 | ;
82 |
83 | fieldsep ::= ','
84 | | ';'
85 | ;
86 |
87 | binop ::= '+'
88 | | '-'
89 | | '*'
90 | | '/'
91 | | '//'
92 | | '^'
93 | | '%'
94 | | '&'
95 | | '~'
96 | | '|'
97 | | '>>'
98 | | '<<'
99 | | '..'
100 | | '<'
101 | | '<='
102 | | '>'
103 | | '>='
104 | | '=='
105 | | '~='
106 | | and
107 | | or
108 | ;
109 |
110 | unop ::= '-'
111 | | not
112 | | '#'
113 | | '~'
114 | ;
115 |
--------------------------------------------------------------------------------
/syntax_5.4.txt:
--------------------------------------------------------------------------------
1 | chunk ::= block
2 | ;
3 |
4 | block ::= {stat} [retstat] ;
5 |
6 | stat ::= ';'
7 | | varlist '=' explist
8 | | functioncall
9 | | label
10 | | break
11 | | goto Name
12 | | do block end
13 | | while exp do block end
14 | | repeat block until exp
15 | | if exp then block {elseif exp then block} [else block] end
16 | | for Name '=' exp ',' exp [',' exp] do block end
17 | | for namelist in explist do block end
18 | | function funcname funcbody
19 | | local function Name funcbody
20 | | local attnamelist ['=' explist]
21 | ;
22 |
23 | attnamelist ::= Name attrib {',' Name attrib} ;
24 |
25 | attrib ::= ['<' Name '>'] ;
26 |
27 | retstat ::= return [explist] [';'] ;
28 |
29 | label ::= '::' Name '::' ;
30 |
31 | funcname ::= Name {'.' Name} [':' Name] ;
32 |
33 | varlist ::= var {',' var} ;
34 |
35 | var ::= Name
36 | | prefixexp '[' exp ']'
37 | | prefixexp '.' Name
38 | ;
39 |
40 | namelist ::= Name {',' Name} ;
41 |
42 | explist ::= exp {',' exp} ;
43 |
44 | exp ::= nil
45 | | false
46 | | true
47 | | Numeral
48 | | LiteralString
49 | | '...'
50 | | functiondef
51 | | prefixexp
52 | | tableconstructor
53 | | exp binop exp
54 | | unop exp
55 | ;
56 |
57 | prefixexp ::= var
58 | | functioncall
59 | | '(' exp ')'
60 | ;
61 |
62 | functioncall ::= prefixexp args
63 | | prefixexp ':' Name args
64 | ;
65 |
66 | args ::= '(' [explist] ')'
67 | | tableconstructor
68 | | LiteralString
69 | ;
70 |
71 | functiondef ::= function funcbody ;
72 |
73 | funcbody ::= '(' [parlist] ')' block end ;
74 |
75 | parlist ::= namelist [',' '...']
76 | | '...'
77 | ;
78 |
79 | tableconstructor ::= '{' [fieldlist] '}' ;
80 |
81 | fieldlist ::= field {fieldsep field} [fieldsep] ;
82 |
83 | field ::= '[' exp ']' '=' exp
84 | | Name '=' exp
85 | | exp
86 | ;
87 |
88 | fieldsep ::= ','
89 | | ';'
90 | ;
91 |
92 | binop ::= '+'
93 | | '-'
94 | | '*'
95 | | '/'
96 | | '//'
97 | | '^'
98 | | '%'
99 | | '&'
100 | | '~'
101 | | '|'
102 | | '>>'
103 | | '<<'
104 | | '..'
105 | | '<'
106 | | '<='
107 | | '>'
108 | | '>='
109 | | '=='
110 | | '~='
111 | | and
112 | | or
113 | ;
114 |
115 | unop ::= '-'
116 | | not
117 | | '#'
118 | | '~'
119 | ;
120 |
--------------------------------------------------------------------------------
/syntax_ast_5.1.txt:
--------------------------------------------------------------------------------
1 | -- TODO declare a parent-node 'op' somehow
2 | -- one downside to this system is .. you need one rule per unique ast node ...
3 | -- TODO all these should inherit from 'op'
4 | -- another TODO ...
5 | -- ... all these were originally implemented as `if token found then build the node`
6 | -- but for the auto generation, I think I have to build the node, then bail if something doesn't match ...
7 | -- ... and that means no more 'mustbe' ? since instead it'll just be returning nil?
8 | -- or should I keep the old design? but that means changing the code-generation ...
9 | -- hmm but that means pushing the first matched token of each rule back into the calling rule as a ...
10 | -- `if canbe(symbol) then parse_nextrule()`
11 | -- that might mean I need to assert every rule only has "or" on its topmost, and then next, every expression starts with a keyword/symbol
12 | --
13 | -- or how about I just generate a FSM? that seems to be trendy.
14 |
15 | -- are the captures just the rules themselves?
16 | -- should I just capture everything?
17 | -- should I tag everything with what rule created it?
18 |
19 | block ::= {(stat) [';']} [(laststat) [';']] ;
20 |
21 | stat ::= 'local' 'function' (Name) (funcbody)
22 | | 'local' (namelist) ['=' (explist)]
23 | | 'function' (funcname) (funcbody)
24 | | 'for' (Name) '=' (exp) ',' (exp) [',' (exp)] 'do' (block) 'end'
25 | | 'for' (namelist) 'in' (explist) 'do' (block) 'end'
26 | | 'if' (exp) 'then' (block) {'elseif' (exp) 'then' (block)} ['else' (block)] 'end'
27 | | 'repeat' (block) 'until' (exp)
28 | | 'while' (exp) 'do' (block) 'end'
29 | | 'do' (block) 'end'
30 | | (functioncall)
31 | | (varlist) '=' (explist)
32 | ;
33 |
34 | laststat ::= 'return' [explist]
35 | | 'break'
36 | ;
37 |
38 | funcname ::= Name {'.' Name} [':' Name] ;
39 |
40 | varlist ::= var {',' var} ;
41 |
42 | var ::= Name
43 | | prefixexp '[' exp ']'
44 | | prefixexp '.' Name
45 | ;
46 |
47 | namelist ::= Name {',' Name} ;
48 |
49 | explist ::= {exp ','} exp ;
50 |
51 | exp ::= 'nil'
52 | | 'false'
53 | | 'true'
54 | | Numeral
55 | | LiteralString
56 | | '...'
57 | | functiondef
58 | | prefixexp
59 | | tableconstructor
60 | | exp binop exp
61 | | unop exp
62 | ;
63 |
64 | prefixexp ::= var
65 | | functioncall
66 | | '(' exp ')'
67 | ;
68 |
69 | functioncall ::= prefixexp args
70 | | prefixexp ':' Name args
71 | ;
72 |
73 | args ::= '(' [explist] ')'
74 | | tableconstructor
75 | | LiteralString
76 | ;
77 |
78 | functiondef ::= 'function' funcbody ;
79 |
80 | funcbody ::= '(' [parlist] ')' block 'end' ;
81 |
82 | parlist ::= namelist [',' '...']
83 | | '...'
84 | ;
85 |
86 | tableconstructor ::= '{' [fieldlist] '}' ;
87 |
88 | fieldlist ::= field {fieldsep field} [fieldsep] ;
89 |
90 | field ::= '[' exp ']' '=' exp
91 | | Name '=' exp
92 | | exp
93 | ;
94 |
95 | fieldsep ::= ','
96 | | ';'
97 | ;
98 |
99 | binop ::= add
100 | | sub
101 | | mul
102 | | div
103 | | pow
104 | | mod
105 | | concat
106 | | lt
107 | | le
108 | | gt
109 | | ge
110 | | eq
111 | | ne
112 | | and
113 | | or
114 | ;
115 |
116 | add ::= '+' ;
117 | sub ::= '-' ;
118 | mul ::= '*' ;
119 | div ::= '/' ;
120 | pow ::= '^' ;
121 | mod ::= '%' ;
122 | concat ::= '..' ;
123 | lt ::= '<' ;
124 | le ::= '<=' ;
125 | gt ::= '>' ;
126 | ge ::= '>=' ;
127 | eq ::= '==' ;
128 | ne ::= '~=' ;
129 | and ::= 'and' ;
130 | or ::= 'or' ;
131 |
132 | unop ::= unm
133 | | not
134 | | len
135 | ;
136 |
137 | unm ::= '-' ;
138 | not ::= 'not' ;
139 | len ::= '#' ;
140 |
141 | -- Name ::= ... how to define valid names ...
142 | -- Numeral ::= ... how to define numerals ...
143 | -- LiteralString ::= how to define literal strings ...
144 |
--------------------------------------------------------------------------------
/syntax_grammar.txt:
--------------------------------------------------------------------------------
1 | ... what other projects are using this parser anyways:
2 |
3 | ./netrefl/netfield_vec.lua:local ast = require 'parser.lua.ast'
4 | ./lua-to-batch/lua_to_batch.lua:local ast = require 'parser.lua.ast'
5 | ./vec/create.lua:local ast = require 'parser.lua.ast'
6 | ./local-default/local-default.lua
7 |
8 | ./sand-attack/verify-demo.lua:local parser = require 'parser'
9 | ./dumpworld-from-2020/convert-mario-maps.lua:local parser = require 'parser'
10 | ./lua-to-batch/lua_to_batch.lua:local parser = require 'parser'
11 | ./zeta2d/convert-mario-maps.lua:local parser = require 'parser'
12 |
13 |
14 |
15 | simplest case for a grammar of grammers:
16 |
17 | rules ::= rule { ';' rule }
18 | rule ::= name '::=' expr_or ;
19 | expr_or ::= expr_list {'|' expr_list} ;
20 | expr_list ::=
21 | '{' expr_or '}'
22 | | '[' expr_or ']'
23 | | Name
24 | | Numeral
25 | | LiteralString
26 | ;
27 |
28 | ... how to also include named-captures into the grammar?
29 |
30 | rules ::= rule { ';' rule }
31 | rule ::= name=name '::=' expr=expr_or ;
32 | expr_or ::= exprs=(expr_list {'|' expr_list});
33 | expr_list ::=
34 | type=multiple '{' expr_or '}'
35 | | type=optional '[' expr_or ']'
36 | | Name
37 | | Numeral
38 | | LiteralString
39 | ;
40 |
41 | ... which would then make the grammar more complex:
42 | Using the new rules:
43 | field=token to capture and assign a single token to field 'field'
44 | field=(token tokens...) to capture and assign multiple tokens
45 | type=whatever to specify that, for this particular '|' branch, the AST node type
46 | ... maybe instead of type=, use some other syntax, to not collide with the field= syntax
47 | ... and maybe somehow syntax to distinguish when we want to capture tokens
48 | like maybe a * means "don't capture token"
49 | or maybe simply no field= means no capture
50 | though field=( ... ) means capture a list, and from there we might want to specify what in the list we don't want to capture
51 |
52 |
53 | What if I modeled the grammar grammar after the parser I already wrote, instead of after the grammars I wrote it after?
54 | How about `*` suffix means "don't trap as a distinct AST, forward back one level instead"
55 | And `name=` means "assign this to a named field" ?
56 | or should I even use named fields?
57 | More flexible for tree traversal if I don't ...
58 | ... and then per-class I could have member functions that return named versions of different fields,
59 | or even __index alias's?
60 |
61 | -- type=block, {stat} [return] will be assigned to self[i] as per default behavior
62 | block ::= { stat } [return] ;
63 |
64 | -- type=return,
65 | -- currently self.exprs[i] = unpack(explist)
66 | -- but maybe I should change from self.exprs[i] to self[i] ?
67 | return ::= 'return' [explist] ;
68 |
69 | -- * after rule name means forward/unpack: don't build a 'stat' node, just forward it back into block.
70 | -- ...or should only the rule-references have *'s for unpacking?
71 | -- But doing so with named fields is ambiguous ... more of an argument to get rid of all named fields.
72 | -- Should the * go on the rule or on the reference-to-rule?
73 | stat* ::=
74 | 'local' local
75 | | 'function' functionstmt
76 | ;
77 |
78 | -- type=local
79 | local ::= 'function' localfunction
80 | | localassign
81 | ;
82 |
83 | -- type=function
84 | -- in my current implementation, but maybe it's a bad idea to depend on 2 levels of AST to determine a local function vs a global function ?
85 | localfunction ::= Name funcbody ;
86 |
87 | -- type=assign
88 | -- but in my implementation I use 'assign' in a lot of places, and for this particular it is a local(assign(...))
89 | localassign ::= namelist ['=' explist] ;
90 |
91 | -- forward, rename type to 'function' (tho we're gonna see 'function' elsewhere) ...
92 | functionstmt* ::= funcname funcbody ;
93 |
94 | -- forward ...
95 | funcname* ::= Name {'.' Name} [':' Name] ;
96 |
97 | -- :funcbody() in my code returns a table
98 | -- whose first argument is the 'parlist' rule locally named 'args'
99 | -- and whose arguments 2...n are the statements in 'block'
100 | funcbody ::= '(' [parlist] ')' block 'end' ;
101 |
102 | -- parlist returns a table of type=var wrapping the arg name, or type=vararg
103 | parlist ::= namelist [',' '...'] | '...' ;
104 |
105 |
106 | TODO
107 | - how does specifying rule class hierarchy work? things like how 'true' and 'false' literal ast node are subclasses of 'boolean' ast node
108 | - also fix args, use indexes whenever possible, use 1:1 with AST grammar whenver possible, justify flattening whenever possible, use aliases
109 | - merge :serialize() and :traverse()
110 | - do something about implicit keywords/symbols that are read but not saved (save them? keep track of where they are?)
111 | - auto grammar -> ast node class generation
112 | - auto grammar -> parser code generation
113 | - move all this stuff into base/ast.lua
114 |
115 |
116 | -- symbols/keywords aren't captured, so ';' isn't captured
117 | -- so all the `rule` objs get put into an `ast._rule`, integer-indexed
118 | rules ::= rule { ';' rule } ;
119 | -- mind you regenerating code with optional elements means deciding where to re-insert them
120 | -- so regenerating the code means either save all the tokens, or it means ... idk what other options ...
121 | rules ::= rule { ';'* rule } ;
122 | -- maybe I should denote them optional with a * suffix or something, and then keep two lists: one of read tokens (for regeneration) and another of indexed tokens via labels or () for capturing or something
123 | rules ::= rules+=rule { ';' rules+=rule } ;
124 |
125 | -- `name=name` means to alias the first capture as 'name'
126 | -- symbols/keywords aren't captured, so '::=' isn't captured
127 | rule ::= name=name '::='* expr_or ;
128 |
129 | expr_or ::= expr_list {'|' expr_list} ;
130 | expr_list ::=
131 | '(' expr_or ')' -- parenthesis mean capture as a separate subtable (otherwise all captured expressions go into [i])
132 | | '{' expr_or '}' -- means multiple
133 | | '[' expr_or ']' -- means optional
134 | | Name
135 | | Numeral
136 | | LiteralString
137 | ;
138 |
139 |
140 |
--------------------------------------------------------------------------------
/tests/flatten.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 |
3 | local tolua = require 'ext.tolua'
4 | local parser = require 'parser'
5 |
6 | local gcode = [[function g() return print'hi' end]]
7 | local fcode = [[return function() g() end]]
8 | local code = gcode..'\n'..fcode
9 |
10 | print('original code')
11 | print(code)
12 | print()
13 |
14 | local ftree = parser.parse(fcode)
15 | print('f code')
16 | print(tolua(ftree))
17 | print('f ast code (should match original code)')
18 | print(ftree:toLua())
19 | print()
20 |
21 | local gtree = parser.parse(gcode)
22 | print('g code')
23 | print(tolua(gtree))
24 | print('g ast code')
25 | print(gtree:toLua())
26 | print()
27 |
28 | local fflat = ftree:flatten{
29 | g = table.unpack(gtree), -- TODO gtree:find'g' to look for global-level definitions?
30 | }
31 | print('flattened f ast')
32 | print(tolua(fflat))
33 | print('flattened f code')
34 | print(fflat:toLua())
35 |
--------------------------------------------------------------------------------
/tests/lua_to_c.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 | local parser = require 'parser'
3 | local ast = require 'parser.lua.ast'
4 | local path = require 'ext.path'
5 | local assert = require 'ext.assert'
6 | local table = require 'ext.table'
7 |
8 | local requires = table()
9 | local cobjtype = 'Object'
10 |
11 | local cppReservedWord = {
12 | 'class',
13 | }
14 |
15 | local tabs = -1 -- because everything is in one block
16 | function tab()
17 | return ('\t'):rep(tabs)
18 | end
19 | function tabblock(t, consume)
20 | tabs = tabs + 1
21 | for i,ti in ipairs(t) do
22 | consume(tab())
23 | consume(ti)
24 | if i < #t then consume'\n' end
25 | end
26 | tabs = tabs - 1
27 | end
28 |
29 | for k,cl in pairs(ast) do
30 | if ast.node:isa(cl) then
31 | function cl:toC()
32 | local s = ''
33 | local consume
34 | consume = function(x)
35 | if type(x) == 'number' then
36 | x = tostring(x)
37 | end
38 | if type(x) == 'string' then
39 | s = s .. x
40 | elseif type(x) == 'table' then
41 | assert.is(x, ast.node)
42 | assert.index(x, 'toC_recursive')
43 | x:toC_recursive(consume)
44 | else
45 | error('here with unknown type '..type(x))
46 | end
47 | end
48 | self:toC_recursive(consume)
49 | return s
50 | end
51 | -- weakness to this design ...i need to always keep specifying the above toC() wrapper, or I have to make a seprate member function...
52 | function cl:toC_recursive(consume)
53 | self:serialize(consume)
54 | end
55 | end
56 | end
57 |
58 |
59 | -- make lua output the default for nodes' c outputw
60 | for _,info in ipairs{
61 | {'concat','+'},
62 | {'and','&&'},
63 | {'or','||'},
64 | {'ne','!='},
65 | } do
66 | local name, op = table.unpack(info)
67 | -- hmm, can I override serialize but only for specific consume()'s ?
68 | -- I guess if I want to test consume == my new custom one vs otherwise call super ...
69 | ast['_'..name].toC_recursive = function(self, consume)
70 | for i,x in ipairs(self) do
71 | consume(x)
72 | if i < #self then
73 | consume' '
74 | consume(op)
75 | consume ' '
76 | end
77 | end
78 | end
79 | end
80 | function ast._not:toC_recursive(consume)
81 | consume'!'
82 | consume(self[1])
83 | end
84 | function ast._len:toC_recursive(consume)
85 | consume(self[1])
86 | consume'.size()'
87 | end
88 | function ast._assign:toC_recursive(consume)
89 | for i=1,#self.vars do
90 | if self.exprs[i] then
91 | consume(self.vars[i])
92 | consume' = '
93 | consume(self.exprs[i])
94 | else
95 | consume(self.vars[i])
96 | end
97 | if i < #self.vars then consume', ' end
98 | end
99 | end
100 | function ast._block:toC_recursive(consume)
101 | tabblock(self, consume)
102 | end
103 | function ast._call:toC_recursive(consume)
104 | consume(self.func)
105 | consume'('
106 | for i,x in ipairs(self.args) do
107 | consume(x)
108 | if i < #self.args then consume', ' end
109 | end
110 | consume')'
111 | if self.func.name == 'require' then
112 | if self.args[1].type == 'string' then
113 | -- ok here we add the require file based on our lua path
114 | -- does this mean we need to declare the lua path up front to lua_to_c?
115 | requires:insert(self.args[1].value)
116 | else
117 | consume'\n#error require arg not a string'
118 | end
119 | end
120 | end
121 | function ast._foreq:toC_recursive(consume)
122 | consume'for ('
123 | consume(cobjtype)
124 | consume' '
125 | consume(self.var)
126 | consume' = '
127 | consume(self.min)
128 | consume'; '
129 | consume(self.var)
130 | consume' < '
131 | consume(self.max)
132 | consume'; '
133 | if self.step then
134 | consume(self.var)
135 | consume' += '
136 | consume(self.step)
137 | else
138 | consume'++'
139 | consume(self.var)
140 | end
141 | consume') {\n'
142 | tabblock(self, consume)
143 | consume(tab())
144 | consume'}'
145 | end
146 | function ast._forin:toC_recursive(consume)
147 | consume'for ('
148 | for i,v in ipairs(self.vars) do
149 | consume(v)
150 | if i < #self.vars then consume', ' end
151 | end
152 | consume' in '
153 | for i,v in ipairs(self.iterexprs) do
154 | consume(v)
155 | if i < #self.iterexprs then consume', ' end
156 | end
157 | consume') {\n'
158 | tabblock(self, consume)
159 | consume(tab())
160 | consume'}'
161 | end
162 | function ast._function:toC_recursive(consume)
163 | if self.name then
164 | -- global-scope def?
165 | --return cobjtype..' '..self.name..'('..table(self.args):mapi(function(arg) return cobjtype..' '..apply(arg) end):concat', '..') {\n' .. tabblock(self, apply) .. tab() .. '}'
166 | -- local-scope named function def ...
167 | consume(cobjtype)
168 | consume' '
169 | consume(self.name)
170 | consume' = []('
171 | for i,arg in ipairs(self.args) do
172 | consume(cobjtype)
173 | consume' '
174 | consume(arg)
175 | if i < #self.args then consume', ' end
176 | end
177 | consume') {\n'
178 | tabblock(self, consume)
179 | consume(tab())
180 | consume'}'
181 | else
182 | -- lambdas?
183 | consume'[]('
184 | for i,arg in ipairs(self.args) do
185 | consume(cobjtype)
186 | consume' '
187 | consume(arg)
188 | if i < #self.args then consume', ' end
189 | end
190 | consume') {\n'
191 | tabblock(self, consume)
192 | consuem(tab())
193 | consume'}'
194 | end
195 | end
196 | function ast._if:toC_recursive(consume)
197 | consume'if ('
198 | consume(self.cond)
199 | consume') {\n'
200 | tabblock(self, consume)
201 | consume(tab()..'}')
202 | for _,ei in ipairs(self.elseifs) do
203 | consume(ei)
204 | end
205 | if self.elsestmt then consume(self.elsestmt) end
206 | end
207 | function ast._elseif:toC_recursive(consume)
208 | consume' else if ('
209 | consume(self.cond)
210 | consume') {\n'
211 | tabblock(self, consume)
212 | consume(tab())
213 | consume'}'
214 | end
215 | function ast._else:toC_recursive(consume)
216 | consume' else {\n'
217 | tabblock(self, consume)
218 | consume(tab())
219 | consume'}'
220 | end
221 | function ast._index:toC_recursive(consume)
222 | consume(self.expr)
223 | consume'['
224 | consume(self.key)
225 | consume']'
226 | end
227 | function ast._indexself:toC_recursive(consume)
228 | consume(self.expr)
229 | consume'.'
230 | consume(self.key)
231 | end
232 | function ast._local:toC_recursive(consume)
233 | if self.exprs[1].type == 'function' or self.exprs[1].type == 'assign' then
234 | -- if exprs[1] is a multi-assign then an 'cobjtype' needs to prefix each new declaration
235 | consume(cobjtype)
236 | consume' '
237 | consume(self.exprs[1])
238 | else
239 | for i=1,#self.exprs do
240 | consume(cobjtype)
241 | consume' '
242 | consume(self.exprs[i])
243 | if i < #self.exprs then consume'\n' end
244 | end
245 | end
246 | end
247 | function ast._vararg:toC_recursive(consume)
248 | consume'reserved_vararg' -- reserved name?
249 | end
250 | function ast._var:toC_recursive(consume)
251 | if cppReservedWord[self.name] then
252 | consume('cppreserved_' .. self.name)
253 | else
254 | consume(self.name)
255 | end
256 | end
257 |
258 |
259 | local function addtab(s)
260 | return '\t'..(s:gsub('\n', '\n\t')) -- tab
261 | end
262 |
263 | -- also populates requires()
264 | local function luaFileToCpp(fn)
265 | assert(fn, "expected filename")
266 | local luacode = assert(path(fn):exists(), "failed to find "..tostring(fn))
267 | local luacode = assert(path(fn):read(), "failed to find "..tostring(fn))
268 | local tree = parser.parse(luacode)
269 | local cppcode = tree:toC()
270 | cppcode = '//file: '..fn..'\n'..cppcode
271 | cppcode = addtab(cppcode)
272 | return cppcode
273 | end
274 |
275 |
276 |
277 | print[[
278 |
279 | #include "CxxAsLua/Object.h"
280 | using namespace CxxAsLua;
281 |
282 | // how to handle _G ...
283 | // esp wrt locals ...
284 | // if we use _G then that incurs overhead ...
285 | Object _G;
286 |
287 | // for global calls ...
288 | Object error;
289 | Object type;
290 | Object require;
291 | Object table;
292 |
293 | int main(int argc, char** argv) {
294 | _G = Object::Map();
295 | _G["package"] = Object::Map();
296 | _G["package"]["loaded"] = Object::Map();
297 |
298 | error = _G["error"] = [](Object x) -> Object {
299 | throw std::runtime_error((std::string)x);
300 | };
301 |
302 | //hmm, 'type' might be used as a global later, so i might have to remove the 'using namespace' and instead replace all Object's with Object::Object's
303 | ::type = _G["type"] = [](Object x) -> Object {
304 | if (x.is_nil()) {
305 | return "nil";
306 | } else if (x.is_string()) {
307 | return "string";
308 | } else if (x.is_table()) {
309 | return "table";
310 | } else if (x.is_boolean()) {
311 | return "boolean";
312 | } else if (x.is_function()) {
313 | return "function";
314 | } else if (x.is_nil()) {
315 | return "nil";
316 | }
317 | //or use getTypeIndex()
318 | // or better yet, rewrite our x.details to be a std::variant,
319 | // and map the variant index to a type,
320 | // then just store type info in that extra arra
321 | };
322 |
323 | table = _G["table"] = Object::Map();
324 |
325 | table["concat"] = [](VarArg arg) -> Object {
326 | if (!arg[1].is_table()) error("expected a table");
327 | //TODO FINISHME
328 | // list, sep, i
329 | std::ostringstream s;
330 | std::string sep = "";
331 | for (const Object& o : arg.objects) {
332 | std::cout << sep;
333 | std::cout << o;
334 | sep = "\t";
335 | }
336 | std::cout << std::endl;
337 | };
338 |
339 | require = _G["require"] = [&](std::string const & s) -> Object {
340 | Object x = _G["package"]["loaded"][s];
341 | if (x != nil) return x;
342 |
343 | x = _G["cppmodules"][s];
344 | if (x != nil) {
345 | x = x();
346 | _G["package"]["loaded"][s] = x;
347 | return x;
348 | }
349 |
350 | return error(Object("idk how to load ") + s);
351 | };
352 |
353 | _G["cppmodules"] = Object::Map();
354 | ]]
355 |
356 | local cppcode = luaFileToCpp(... or 'lua_to_c_test.lua')
357 |
358 | for _,req in ipairs(requires) do
359 | -- ok here's where lua_to_c has to assume the same LUA_PATH as the c++ runtime
360 | print('//require: '..req)
361 | local fn = package.searchpath(req, package.path)
362 | if not fn then
363 | print("// package.searchpath couldn't find file")
364 | else
365 | print([[
366 | _G["cppmodules"]["]]..req..[["] = []() -> Object {
367 | ]])
368 | print(addtab(luaFileToCpp(fn)))
369 |
370 | print[[
371 | };
372 | ]]
373 | end
374 | end
375 |
376 | print(cppcode)
377 |
378 | print[[
379 | }
380 | ]]
381 |
--------------------------------------------------------------------------------
/tests/lua_to_c_test.lua:
--------------------------------------------------------------------------------
1 | print'hello'
2 |
--------------------------------------------------------------------------------
/tests/parse.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 | local path = require 'ext.path'
3 | local parser = require 'parser'
4 | local tree = assert(parser.parse(path(assert(..., "expected filename")):read()))
5 | print(tree:toLua())
6 |
--------------------------------------------------------------------------------
/tests/parsemyself.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 | local path = require 'ext.path'
3 | local os = require 'ext.os'
4 | local LuaParser = require 'parser'
5 |
6 | -- TODO would be nice to remember who is executing you ... lua vs luajit vs whatever ...
7 | local lua = 'lua'
8 |
9 | local inceptionLevel = ... or 1
10 | inceptionLevel = assert(tonumber(inceptionLevel), "expected number")
11 | if inceptionLevel > 5 then
12 | print('nobody can survive beyond 5 inception levels')
13 | return
14 | end
15 |
16 | local dstpath = path'inception'
17 | dstpath = dstpath:abs()
18 | dstpath:mkdir()
19 |
20 | -- now parse and output a new Lua path in the dst folder ...
21 | local function rewrite(src, dst)
22 | print(src..' => '..dst)
23 | dst:getdir():mkdir(true)
24 | assert(dst:write(LuaParser.parse((assert(src:read()))):toLua()))
25 | end
26 |
27 | -- find all lua files? search the rockspec?
28 | local srcpath = path'../..'
29 | for _,info in ipairs{
30 | -- [[ if you want to parse *everything* and not just the parser tree
31 | {dir='ext', files={'assert.lua', 'class.lua', 'cmdline.lua', 'coroutine.lua', 'ctypes.lua', 'debug.lua', 'detect_ffi.lua', 'detect_lfs.lua', 'detect_os.lua', 'env.lua', 'ext.lua', 'fromlua.lua', 'gcmem.lua', 'io.lua', 'load.lua', 'math.lua', 'meta.lua', 'number.lua', 'op.lua', 'os.lua', 'path.lua', 'range.lua', 'reload.lua', 'require.lua', 'string.lua', 'table.lua', 'timer.lua', 'tolua.lua', 'xpcall.lua'}},
32 | --]]
33 | {dir='parser', files={'parser.lua', 'load_xform.lua'}},
34 | {dir='parser/base', files={'ast.lua', 'datareader.lua', 'parser.lua', 'tokenizer.lua'}},
35 | {dir='parser/lua', files={'ast.lua', 'parser.lua', 'tokenizer.lua'}},
36 | {dir='parser/grammar', files={'parser.lua', 'tokenizer.lua'}},
37 | {dir='parser/tests', files={'flatten.lua', 'lua_to_c.lua', 'lua_to_c_test.lua', 'validate.lua', 'parse.lua', 'parsemyself.lua', 'spantest.lua'}},
38 | } do
39 | for _,fn in ipairs(info.files) do
40 | rewrite(srcpath/info.dir/fn, dstpath/info.dir/fn)
41 | end
42 | end
43 |
44 | -- then chdir and run it again
45 | dstpath'parser/tests':cd()
46 | os.exec(
47 | -- [[ if you want to only use reparsed content for the second parse ...
48 | 'LUA_PATH="'..dstpath..'/?.lua;'..dstpath..'/?/?.lua" && '..
49 | --]]
50 | lua..' parsemyself.lua '..(inceptionLevel+1))
51 |
--------------------------------------------------------------------------------
/tests/spantest.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 | require 'ext'
3 | local LuaParser = require 'parser.lua.parser'
4 |
5 | --[=[
6 | local code = [[
7 | local result = aa and bb
8 | x = 1
9 | y = 2
10 | z = x + y
11 | function h()
12 | print'hello world'
13 | return 42
14 | end
15 | ]]
16 | --]=]
17 | -- [=[
18 | local code = [[
19 | function f() end
20 | function g() end
21 | function h() end
22 | ]]
23 | --]=]
24 | --[[
25 | local code = path'../lua/parser.lua':read()
26 | --]]
27 | local parser = LuaParser(code, code)
28 |
29 | local tree = parser.tree
30 | local datareader = parser.t.r
31 |
32 | -- TODO this but for every test in minify_tests.txt
33 | -- then verify the :lua() serialized results match the source results
34 | local function printspan(x, tab)
35 | tab = tab or ''
36 | if x.type then
37 | local reconstructed = x:toLua()
38 | print(tab..'tostring():', string.trim(reconstructed))
39 | local fromIndexSpan = code:sub(x.span.from.index, x.span.to.index)
40 | print(tab..'span substr:', tolua(fromIndexSpan))
41 | local fromTokenSpan = datareader.tokenhistory:sub(x.span.from.tokenIndex, x.span.to.tokenIndex):concat()
42 | print(tab..'token range: '..x.span.from.tokenIndex..', '..x.span.to.tokenIndex)
43 | print(tab..'token substr:', tolua(fromTokenSpan))
44 | print(tab..'type:', x.type)
45 |
46 | --[[
47 | local reconstructedCode = load(reconstructed):dump()
48 | local fromIndexSpanCode = load(fromIndexSpan):dump()
49 | local fromTokenSpanCode = load(fromTokenSpan):dump()
50 | assert.eq(reconstructedCode:hexdump(), fromIndexSpanCode:hexdump())
51 | assert.eq(reconstructedCode:hexdump(), fromTokenSpanCode:hexdump())
52 | --]]
53 | --[[
54 | local function reduceString(s)
55 | -- remove comments too, those will be in tokenSpan text
56 | s = s:gsub('%-%-[^\n]*', '')
57 | repeat
58 | local start1, start2 = s:find('%-%-%[=*%[')
59 | if not start1 then break end
60 | local eq = s:sub(start1+3, start2-1)
61 | assert(eq:match'^=*$')
62 | local finish1, finish2 = s:find('%]'..eq..'%]', start2)
63 | if not finish1 then break end
64 | s = s:sub(1, start1-1)..s:sub(finish2+1)
65 | until false
66 | s = s:gsub('%s+', ''):gsub('["\']', "'")
67 | return s
68 | end
69 | reconstructed = reduceString(reconstructed)
70 | fromIndexSpan = reduceString(fromIndexSpan)
71 | fromTokenSpan = reduceString(fromTokenSpan)
72 | assert.eq(reconstructed, fromIndexSpan)
73 | assert.eq(reconstructed, fromTokenSpan)
74 | --]]
75 | end
76 | for k,v in pairs(x) do
77 | if k == 'span' then
78 | print(tab..k..' = index range '..tostring(v.from.index)..'..'..tostring(v.to.index)
79 | ..', line/col range '..v.from.line..'/'..v.from.col..'..'..v.to.line..'/'..v.to.col)
80 | elseif k ~= 'parent'
81 | and k ~= 'span'
82 | and k ~= 'parser'
83 | then
84 | if type(v) == 'table' then
85 | print(tab..k)
86 | printspan(v, tab..' ')
87 | else
88 | print(tab..k..' = '..(v.toLua and v:toLua() or tostring(v)))--tolua(v))
89 | end
90 | end
91 | end
92 | end
93 |
94 | printspan(tree)
95 |
--------------------------------------------------------------------------------
/tests/strings.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env luajit
2 | local assert = require 'ext.assert'
3 | local Parser = require 'parser'
4 |
5 | local function test(codein, eq)
6 | local codeout = ''..Parser.parse(codein)
7 | print(codein, codeout)
8 | local s = assert(load(codeout))() -- evaluate it ...
9 | assert.eq(s, eq) -- assert it's correct
10 | end
11 |
12 | -- parse dec escape code, since 5.1
13 | if _VERSION >= 'Lua 5.1' then
14 | test([[return '\97']], 'a')
15 | end
16 |
17 | -- parse hex escape code, since 5.2
18 | if _VERSION >= 'Lua 5.2' then
19 | test([[return '\x62']], 'b') -- don't test same as before, in case false positives
20 | test([[return '\x7a']], 'z') -- make sure to test hex chars
21 | end
22 |
23 | -- parse unicode, since 5.3
24 | if _VERSION >= 'Lua 5.3' then
25 | test([[return '\u{2200}']], '∀')
26 | test([[return '\u{2a01}']], '⨁')
27 | end
28 |
--------------------------------------------------------------------------------
/tests/validate.lua:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env lua
2 | --[[
3 | './validate.lua' = runs through the validation tests and verifies that the parser produces the correct output for the currently-running version of lua
4 | './validate.lua all' = to test all versions at once (provided you have all built and with their names matching etc ...)
5 | './validate.lua makekey' = to regenerate the key to stdout
6 | --]]
7 | require 'ext'
8 | local luas = table{
9 | 'lua5.0', -- lua 5.0.3
10 | 'lua5.1', -- lua 5.1.5
11 | 'lua5.2', -- lua 5.2.4 with LUA_COMPAT_ALL enabled
12 | 'lua5.3', -- lua 5.3.6 with LUA_COMPAT_5_2 enabled
13 | 'lua5.4', -- lua 5.4.7 with LUA_COMPAT_5_3 enabled
14 | 'luajit', -- luajit 2.1.x ... I think openresty variant ... with LUAJIT_ENABLE_LUA52COMPAT enabled
15 | -- (TODO luajit 2.0x vs 2.1x, vanilla vs openresty)
16 | }
17 | local tmp = path'tmp.lua'
18 | local lines = assert(path'validate-key.txt':read()):trim():split'\n'
19 | local trimmedlines = lines:mapi(function(line) -- trim comments
20 | return (line:match'^(.-)%-%-.*$' or line):trim()
21 | end)
22 | local maxline = trimmedlines:mapi(function(line) return #line end):sup()
23 |
24 | -- which to test? current version or all?
25 | local testluas
26 | if cmdline.all then
27 | testluas = table(luas)
28 | else
29 | local version = _VERSION:match'^Lua (.*)$'
30 | --if version == '5.1' and jit then version = '5.2' end -- TODO more on luajit versions and COMPAT* builds and parser feature detection ...
31 | if jit then version = 'jit' end
32 | testluas = table{'lua'..version}
33 | end
34 |
35 | for i,line in ipairs(lines) do
36 |
37 | -- [[ if we're making the key ...
38 | if cmdline.makekey then
39 | -- TODO more comprehensive on with/without COMPAT flags enabled
40 | local verstats = {}
41 | for _,lua in ipairs(luas) do
42 |
43 | tmp:write(line)
44 | local results = table.pack(os.execute(lua..' -e "assert(loadfile\''..tmp..'\')" > /dev/null 2>&1')) -- load, don't run
45 | local luaSuccess = not not results[1]
46 |
47 | if not luaSuccess and results[2] == 'signal' and results[3] ~= 1 then break end -- detect ctrl+c instead of syntax error ... this is not always picking it up
48 |
49 | --print()
50 | --print(results:unpack())
51 | verstats[lua] = luaSuccess
52 | -- [[ check my old key for bugs/changes
53 | local version = lua:match'^lua(%d%.%d$)'
54 | if version then
55 | local expected = not line:match'FAIL_'..version
56 | assert.eq(expected, result)
57 | end
58 | --]]
59 | end
60 | local line = trimmedlines[i] -- don't need comments so use the comment-less version
61 | print(line..(' '):rep(maxline - #line + 10)..'--\t'..luas:mapi(function(lua)
62 | return lua..'='..tostring(verstats[lua] and 1 or 0)
63 | end):concat'\t')
64 |
65 | else
66 | --]]
67 | -- [[ if we're testing the parser ...
68 | for _,testlua in ipairs(testluas) do
69 | -- TODO remove the 'lua' prefix and TODO make sure this is compat with whatever the parser version input is ...")
70 |
71 | -- determine 'version' to pass to the parser
72 | -- TODO more on luajit versions and COMPAT* builds and parser feature detection ...
73 | local version = testlua:match'^lua(.*)$'
74 | if version == '5.1' and jit then version = '5.2' end
75 |
76 | local keySuccess = assert(
77 | -- TODO if we don't have it then ... regenerate it from the bin ... ? and maybe even re-write it out?
78 | line:match('lua'..version..'=(%d)'), "couldn't find lua version "..version
79 | ) ~= '0'
80 |
81 | local luaSuccess
82 | if cmdline.all then
83 | tmp:write(line)
84 | local results = table.pack(os.execute(testlua..' -e "assert(loadfile\''..tmp..'\')" > /dev/null 2>&1')) -- load, don't run
85 | luaSuccess = not not results[1]
86 | else
87 | luaSuccess = not not (loadstring or load)(line)
88 | end
89 |
90 | local LuaParser = require 'parser.lua.parser'
91 | -- mannnn between parser.parse, Parser:init, Parser:setData, and parser/base/parser and parser/lua/parser, I need to clean up these function signatures
92 | local parser = LuaParser(nil, version, nil, testlua == 'luajit')
93 | local parseSuccess, errorString = parser:setData(line)
94 | parseSuccess = not not parseSuccess
95 | print('key results', keySuccess, 'parser results', parseSuccess, 'lua results', luaSuccess, 'line', line, 'version', version)
96 | if keySuccess ~= parseSuccess or parseSuccess ~= luaSuccess then
97 | error("parser failed to recreate same results. error="..errorString)
98 | end
99 | end
100 | end
101 | --]]
102 | end
103 | tmp:remove()
104 |
--------------------------------------------------------------------------------