├── COPYRIGHT ├── Makefile ├── README ├── doc ├── doc.css ├── grammar.html ├── index.html ├── lua.gif └── parser.html ├── examples ├── comment-extraction.lua ├── local-imports.lua ├── metalua-ast.lua ├── switch-macro.lua ├── syntax-highlighting.lua └── vararg-indexing.lua ├── release ├── rockspec ├── leg-0.1.2-1.rockspec └── leg-scm-1.rockspec ├── src ├── grammar.lua ├── init.lua └── parser.lua └── tests ├── test.lua └── test_parser.lua /COPYRIGHT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keplerproject/leg/dcd25ff58eab815cb158afcf4cc541281bc36408/COPYRIGHT -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Makefile for Leg 3 | # $Id: Makefile,v 1.8 2007/12/07 14:23:56 hanjos Exp $ 4 | # 5 | 6 | # ===== LUA PATHS ================= 7 | # Lua's library directory 8 | LUA_LIB = /usr/local/share/lua/5.1 9 | 10 | # ===== PROJECT INFO ============== 11 | # project info 12 | NAME = leg 13 | VERSION = 0.2 14 | 15 | # project directories 16 | DOC_DIR = doc 17 | SRC_DIR = src 18 | TEST_DIR = tests 19 | 20 | # the document generator used to build the documentation in doc/. 21 | # It uses an unreleased generator, so this is for my personal use. 22 | DOCCER = lua /usr/local/share/lua/5.1/ldt/ldt.lua 23 | 24 | # ===== RULES ===================== 25 | leg: 26 | mkdir -p $(NAME) 27 | rm -f $(NAME)/*.lua 28 | cp src/*.lua $(NAME) 29 | 30 | install: 31 | # copying the source files to LUA_LIB 32 | mkdir -p $(LUA_LIB)/$(NAME) 33 | rm -f $(LUA_LIB)/$(NAME)/*.lua 34 | cp -r src/* $(LUA_LIB)/$(NAME) 35 | 36 | clean: 37 | # removing the source files and package 38 | rm -r $(LUA_LIB)/$(NAME) 39 | 40 | documents: 41 | # generate documentation 42 | mkdir -p $(DOC_DIR) 43 | $(DOCCER) src/*.lua 44 | mv ldt/* $(DOC_DIR) 45 | rm -rf ldt 46 | 47 | bundle: 48 | # tar-ing it (this works only with version 1.19 and beyond, due 49 | # the --exclude-vcs flag) 50 | tar --create --verbose --exclude-vcs --file=../$(NAME)-$(VERSION).tar ../$(NAME) 51 | 52 | # zipping it 53 | gzip ../$(NAME)-$(VERSION).tar 54 | 55 | test: 56 | cd tests; lua test.lua 57 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Leg 0.1.2 2 | ========= 3 | 4 | This is a release of Leg, a Lua library exporting a complete Lua 5.1 grammar 5 | and a small API for user manipulation. 6 | 7 | 8 | Dependencies 9 | ------------ 10 | 11 | * You need, understandably, to have Lua 5.1 up and running to be able to use 12 | this library :) 13 | 14 | * Also, Leg uses LPeg 0.7 extensively for pattern matching, so LPeg is 15 | expected be installed. You can get LPeg at 16 | 17 | http://www.inf.puc-rio.br/~roberto/lpeg.html 18 | 19 | 20 | Basic Installation 21 | ------------------ 22 | 23 | There are three ways to install Leg: 24 | 25 | * `make install` 26 | 27 | Run `make install`, and a directory called `leg` with the source files inside 28 | will be put in a specific path. Tweak Makefile's LUA_LIB variable to 29 | indicate the appropriate path for your system; Makefile ships with it set 30 | to /usr/local/share/lua/5.1 . Make sure you have the proper permissions to 31 | access the path you want; if not or in doubt, use the `make` option below. 32 | 33 | * `make` or `make leg` 34 | 35 | A directory `leg` will be created in your working directory, with the 36 | source files inside. Just put it in a LUA_PATH-visible place and you're 37 | ready to go. 38 | 39 | * by hand 40 | 41 | If you don't have, don't want to or can't use `make`, you can just put all 42 | the files in `src` inside a directory called `leg`, and put that directory 43 | in your LUA_PATH. 44 | 45 | 46 | Read the Lua Reference Manual for the LUA_PATH and the LUA_CPATH syntax 47 | (http://www.lua.org/manual/5.1/manual.html#pdf-package.path). 48 | 49 | 50 | Copyright 51 | --------- 52 | 53 | See the file "COPYRIGHT". 54 | 55 | 56 | Bug Fixes and New Stuff 57 | ----------------------- 58 | 59 | See the file "release". 60 | 61 | 62 | Testing 63 | ------- 64 | 65 | You can either: 66 | 67 | * run `make test`; or 68 | 69 | * go to the directory `tests` and run `test.lua`. 70 | 71 | Both do the same thing. 72 | 73 | 74 | Work to do 75 | ---------- 76 | 77 | * Improve error checking: currently it is bolted on and not extensible, and 78 | different patterns react differently to mismatching: scanner.STRING throws 79 | an error when a mismatch happens, but some errors simply return false and an 80 | error message. I don't know a good way to handle this. 81 | 82 | * A better API for grammar extensions. The current one is very ad hoc, and 83 | requires some fine tuning to make sure it works correctly. Metalua's API 84 | seems interesting, and was originally based on Parsec. Investigation is 85 | under way. 86 | 87 | * The binary operators' precedences isn't enforced in the grammar; one must 88 | enforce it by hand when capturing expressions. This can be very annoying. 89 | 90 | * More thorough testing. 91 | -------------------------------------------------------------------------------- /doc/doc.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin-left: 1em; 3 | margin-right: 1em; 4 | font-family: Arial, geneva, serif; 5 | background-color:#ffffff; margin:0px; 6 | font-size: 11pt; 7 | } 8 | body, td, th { color:#000000; } 9 | 10 | code { 11 | font-family: "Andale Mono", monospace; 12 | } 13 | 14 | .example { 15 | background-color: rgb(245, 245, 245); 16 | border-top-width: 1px; 17 | border-right-width: 1px; 18 | border-bottom-width: 1px; 19 | border-left-width: 1px; 20 | border-top-style: solid; 21 | border-right-style: solid; 22 | border-bottom-style: solid; 23 | border-left-style: solid; 24 | border-top-color: silver; 25 | border-right-color: silver; 26 | border-bottom-color: silver; 27 | border-left-color: silver; 28 | padding: 1em; 29 | margin-left: 1em; 30 | margin-right: 1em; 31 | font-family: "Andale Mono", monospace; 32 | font-size: smaller; 33 | } 34 | 35 | tt { 36 | font-family: "Andale Mono", monospace; 37 | } 38 | 39 | a { body, td, th { font-size: 11pt; } } 40 | 41 | h1 { font-size:1.5em; } 42 | h2 { font-size:1.25em; } 43 | h3 { font-size:1.00em; } 44 | h3 { padding-top: 1em; } 45 | h4 { font-size:0.90em; } 46 | /* 47 | h1, h2, h3, h4 { margin-left: 0em; } 48 | #product_logo 49 | { 50 | } 51 | 52 | #product_name 53 | { 54 | } 55 | 56 | #product_description 57 | { 58 | } 59 | 60 | */ 61 | 62 | textarea, pre, tt { font-size:10pt; } 63 | small { font-size:0.85em; } 64 | a:link { font-weight:bold; color: #004080; text-decoration: none; } 65 | a:visited { font-weight:bold; color: #006699; text-decoration: none; } 66 | a:link:hover { text-decoration:underline; } 67 | hr { color:#cccccc } 68 | img { border-width: 0px; } 69 | 70 | p { margin-left: 1em; } 71 | 72 | p.name { 73 | font-family: "Andale Mono", monospace; 74 | padding-top: 1em; 75 | margin-left: 0em; 76 | } 77 | 78 | blockquote { margin-left: 3em; } 79 | 80 | hr { 81 | margin-left: 0em; 82 | background: #00007f; 83 | border: 0px; 84 | height: 1px; 85 | } 86 | 87 | ul { list-style-type: disc; } 88 | 89 | table.index { border: 1px #00007f; } 90 | table.index td { text-align: left; vertical-align: top; } 91 | table.index ul { padding-top: 0em; margin-top: 0em; } 92 | 93 | table.stats { 94 | font-size: 8pt; 95 | border: 3px solid black; 96 | margin-left: 0px; 97 | margin-right: 0px; 98 | float: right; 99 | clear: right; 100 | } 101 | 102 | table { 103 | border: 1px solid black; 104 | border-collapse: collapse; 105 | margin-left: auto; 106 | margin-right: auto; 107 | } 108 | th { 109 | border: 1px solid black; 110 | padding: 0.5em; 111 | } 112 | td { 113 | border: 1px solid black; 114 | padding: 0.5em; 115 | } 116 | div.header, div.footer { margin-left: 0em; } 117 | 118 | #container 119 | { 120 | margin-left: 1em; 121 | margin-right: 1em; 122 | background-color: #f0f0f0; 123 | } 124 | 125 | #product 126 | { 127 | text-align: center; 128 | border-bottom: 1px solid #cccccc; 129 | background-color: #ffffff; 130 | } 131 | 132 | #product big { 133 | font-size: 2em; 134 | } 135 | 136 | #main 137 | { 138 | background-color: #f0f0f0; 139 | border-left: 2px solid #cccccc; 140 | } 141 | 142 | #navigation 143 | { 144 | float: left; 145 | width: 12em; 146 | margin: 0; 147 | vertical-align: top; 148 | background-color: #f0f0f0; 149 | overflow:visible; 150 | } 151 | 152 | #navigation h1 { 153 | background-color:#e7e7e7; 154 | font-size:1.1em; 155 | color:#000000; 156 | text-align:left; 157 | margin:0px; 158 | padding:0.2em; 159 | border-top:1px solid #dddddd; 160 | border-bottom:1px solid #dddddd; 161 | } 162 | 163 | #navigation ul 164 | { 165 | font-size:1em; 166 | list-style-type: none; 167 | padding: 0; 168 | margin: 1px; 169 | } 170 | 171 | #navigation li 172 | { 173 | text-indent: -1em; 174 | margin: 0em 0em 0em 0.5em; 175 | display: block; 176 | padding: 3px 0px 0px 12px; 177 | } 178 | 179 | #navigation li li a 180 | { 181 | padding: 0px 3px 0px -1em; 182 | } 183 | 184 | #content 185 | { 186 | margin-left: 12em; 187 | padding: 1em; 188 | border-left: 2px solid #cccccc; 189 | border-right: 2px solid #cccccc; 190 | background-color: #ffffff; 191 | } 192 | 193 | #about 194 | { 195 | clear: both; 196 | margin: 0; 197 | padding: 5px; 198 | border-top: 2px solid #cccccc; 199 | background-color: #ffffff; 200 | } 201 | 202 | @media print { 203 | body { 204 | font: 10pt "Times New Roman", "TimeNR", Times, serif; 205 | } 206 | a { font-weight:bold; color: #004080; text-decoration: underline; } 207 | 208 | #main { background-color: #ffffff; border-left: 0px; } 209 | #container { margin-left: 2%; margin-right: 2%; background-color: #ffffff; } 210 | 211 | #content { margin-left: 0px; padding: 1em; border-left: 0px; border-right: 0px; background-color: #ffffff; } 212 | 213 | #navigation { display: none; 214 | } 215 | 216 | #product_logo 217 | { 218 | display: none; 219 | } 220 | 221 | #about img 222 | { 223 | display: none; 224 | } 225 | 226 | .example { 227 | font-family: "Andale Mono", monospace; 228 | font-size: 8pt; 229 | page-break-inside: avoid; 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /doc/grammar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | grammar 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 |
14 |

15 |

18 |
grammar
19 |
LPeg grammar manipulation
20 |
21 | 22 |
Version: 0.2
23 |
Generated: December 07, 2007
24 | 25 |
26 | 27 |
28 | 29 | 114 | 115 |
116 | 117 | 118 | 119 |

120 | 121 | 122 |

123 |

Description

124 |

125 | This module defines a handful of operations which can be applied to 126 | LPeg patterns and grammars in 127 | general. 128 |

129 |

Dependencies

130 | 132 |

133 |

Operations

134 |

135 |

Piping

136 |

137 | Pattern matching dissociates the notion of matching from the notion of 138 | capturing: matching checks if a given string follows a certain pattern, 139 | and capturing generates values according to the match made. This division 140 | allows interesting possibilities: 141 |

    142 |
  • different problems can be solved by applying different captures to the same grammar;
  • 143 |
  • captures may be defined separately;
  • 144 |
  • captures may be done on top of other captures.
145 |

146 | Accounting for the first and second bullets, the grammar given in 147 | parser has no captures, enabling the user to reuse it to solve any 148 | problems that require a Lua grammar. One good example is documentation 149 | generation, described in a little more detail below. 150 |

151 | The third bullet depicts a more interesting idea: a capture might take the 152 | result of another capture as input, doing a further transformation of the 153 | original data. This capture chaining, with the latter ones using the former's 154 | output as its input, is very similar to Unix pipelines, 155 | so this mechanism was named piping. 156 |

157 |

Completing

158 |

159 | With piping, several levels of captures can be chained together up to the 160 | most appropriate for the task at hand. Yet some levels might require extra rules, and modifications to existing ones, to ensure proper matching. 161 |

162 | To avoid manual copying, the new grammar should redefine only the necessary 163 | rules, copying the rest from the older grammar. This action is dubbed 164 | completing. 165 |

166 |

Applying

167 |

168 | Once a new rule set is created and completed, and 169 | all captures are correctly piped, all that's left is 170 | to put them together, a process called applying. The result is a grammar ready for lpeg.P 171 | consumption, whose pattern will return the intended result when a match is made. 172 |

173 |

Example

174 |

175 | Let's consider the problem of documenting a Lua module. In this case, comments 176 | must be captured before every function declaration when in the outermost scope: 177 |

178 |

179 |  -- the code to parse
180 | subject = [[
181 |  -- Calculates the sum a+b. 
182 |  -- An extra line.
183 |  function sum (a, b)
184 |  -- code
185 |  end
186 | 
187 |  -- f1: assume a variable assignment is not a proper declaration for an 
188 |  -- exported function
189 |  f1 = function ()
190 |  -- code
191 |  end
192 | 
193 |  while true do
194 |    -- this function is not in the outermost scope
195 |    function aux() end
196 |  end
197 |  
198 |  function something:other(a, ...)
199 |    -- a global function without comments
200 |  end
201 | ]]
202 | 
203 |

204 | In the code above only sum and something:other should be documented, as f1 isn't properly (by our standards) declared and aux is not in the outermost scope. 205 |

206 | By combining LPeg and the modules parser and grammar, this specific problem can be solved as follows: 207 |

208 |

209 |  -- ye olde imports
210 | local parser, grammar = require 'leg.parser', require 'leg.grammar'
211 | local lpeg = require 'lpeg'
212 | 
213 |  -- a little aliasing never hurt anyone
214 | local P, V = lpeg.P, lpeg.V
215 | 
216 |  -- change only the initial rule and make no captures
217 | patt = grammar.apply(parser.rules, parser.COMMENT^-1 * V'GlobalFunction', nil)
218 | 
219 |  -- transform the new grammar into a LPeg pattern
220 | patt = P(patt)
221 | 
222 |  -- making a pattern that matches any Lua statement, also without captures
223 | Stat = P( grammar.apply(parser.rules, V'Stat', nil) )
224 | 
225 |  -- a pattern which matches function declarations and skips statements in
226 |  -- inner scopes or undesired tokens
227 | patt = (patt + Stat + parser.ANY)^0
228 | 
229 |  -- matching a string
230 | patt:match(subject)
231 | 
232 |

233 | These are the relevant rules in the grammar: 234 |

235 |

236 | GlobalFunction = 'function' * FuncName * FuncBody
237 | FuncName     = ID * ('.' * ID)^0 * (':' * ID)^-1
238 | FuncBody     = '(' * (ParList + EPSILON) * ')' * Block * 'end'
239 | ParList      = NameList * (',' * '...')^-1
240 | NameList     = ID * (',' * ID)^0
241 | ID           = parser.IDENTIFIER
242 | EPSILON      = P(true)
243 | 
244 |

245 | It may seem that ParList + EPSILON could be substituted for ParList^-1 (optionally match ParList), but then no captures would be made for empty parameter lists, and GlobalFunction would get all strings matched by FuncBody. The EPSILON rule acts in this manner as a placeholder in the argument list, avoiding any argument list processing in the capture function. 246 |

247 | Since no captures are being made, lpeg.match doesn't return anything interesting. Here are some possible captures: 248 |

249 |

250 |  -- some interesting captures bundled up in a table. Note that the table keys
251 |  -- match the grammar rules we want to add captures to. Whatever rules aren't in
252 |  -- the rules table below will come from parser.rules .
253 | captures = {
254 |  [1] = function (...)  -- the initial rule
255 |    return '<function>'..table.concat{...}..'</function>' 
256 |  end,
257 |  
258 |  GlobalFunction = function (name, parlist)
259 |    return '<name>'..name..'</name><parlist>'..(parlist or '')..'</parlist>' 
260 |  end,
261 |  
262 |  FuncName = grammar.C,  -- capture the raw text
263 |  ParList  = grammar.C,  -- capture the raw text
264 |  COMMENT  = parser.comment2text,  -- remove the comment trappings
265 | }
266 | 
267 |  -- spacing rule
268 | local S = parser.SPACE ^ 0
269 | 
270 |  -- rules table
271 | rules = {
272 |  [1]     = ((V'COMMENT' *S) ^ 0) *S* V'GlobalFunction',
273 |  COMMENT = parser.COMMENT,
274 | }
275 | 
276 |  -- building the new grammar and adding the captures
277 | patt = P( grammar.apply(parser.rules, rules, captures) )
278 | 
279 |  -- a pattern that matches a sequence of patts and concatenates the results
280 | patt = (patt + Stat + parser.ANY)^0 / function(...) 
281 |  return table.concat({...}, '\n\n')  -- some line breaks for easier reading
282 | end
283 | 
284 |  -- finally, matching a string
285 | print(patt:match(subject))
286 | 
287 |

288 | FuncBody needs no captures, as Block and all its non-terminals have none; it 289 | just needs to pass along any captures made by ParList. NameList and ID also have no captures, and the whole subject string is passed further. 290 |

291 | The printed result is: 292 |

293 | <function>Calculates the sum a+b. An extra line.<name>sum</name><parlist>a, b</parlist></function>
294 | 
295 | <function><name>something:other</name><parlist>a, ...</parlist></function> 296 |
297 |

298 |

299 | 300 | 301 | 302 | 303 | 304 |


305 |

Functions

306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 |
anyOf (t) Returns a pattern which matches any of the patterns in t.
anywhere (patt) Returns a pattern which searches for the pattern patt anywhere in a string.
apply (grammar, rules, captures) Completes rules with grammar and then applies captures.
C () A capture function, made so that patt / C is equivalent to m.C(patt). It's intended to be used in capture tables, such as those required by pipe and apply.
complete (dest, orig) Completes dest with orig.
copy (grammar) Creates a shallow copy of grammar.
Ct () A capture function, made so that patt / Ct is equivalent to m.Ct(patt). It's intended to be used in capture tables, such as those required by pipe and apply.
listOf (patt, sep) Returns a pattern which matches a list of patts, separated by sep.
oneOf (list) Returns a pattern which matches any of the patterns in list.
pipe (dest, orig) Pipes the captures in orig to the ones in dest.
pmatch (patt) Returns a pattern which simply fails to match if an error is thrown during the matching.
375 | 376 | 377 |

378 |


anyOf (t) 379 |
    Returns a pattern which matches any of the patterns in t. 380 |

    381 | The iterator pairs is used to traverse t, so no particular traversal order 382 | is guaranteed. Use oneOf to ensure sequential matching 383 | attempts. 384 |

    385 | Example: 386 |

    387 | local g, p, m = require 'leg.grammar', require 'leg.parser', require 'lpeg'
    388 | 
    389 |  -- match numbers or operators, capture the numbers
    390 | print( (g.anyOf { '+', '-', '*', '/', m.C(p.NUMBER) }):match '34.5@23 * 56 / 45 - 45' )
    391 |  --> prints 34.5
    392 | 
    393 |

    394 | Parameters:

      395 |
    • t: a table with LPeg patterns as values. The keys are ignored.
    396 |

    397 | Returns:

      398 |
    • a pattern which matches any of the patterns received.
399 | 400 |

401 |


anywhere (patt) 402 |
    Returns a pattern which searches for the pattern patt anywhere in a string. 403 |

    404 | This code was extracted from the LPeg home page, in the examples section. 405 |

    406 | Parameters:

      407 |
    • patt: a LPeg pattern.
    408 |

    409 | Returns:

      410 |
    • a LPeg pattern which searches for patt anywhere in the string.
411 | 412 |

413 |


apply (grammar, rules, captures) 414 |
    Completes rules with grammar and then applies captures. 415 |

    416 | rules can either be:

      417 |
    • a single pattern, which is taken to be the new initial rule,
    • 418 |
    • a possibly incomplete LPeg grammar table, as per complete, or
    • 419 |
    • nil, which means no new rules are added.
    420 |

    421 | captures can either be:

      422 |
    • a capture table, as per pipe, or
    • 423 |
    • nil, which means no captures are applied.
    424 |

    425 | Parameters:

      426 |
    • grammar: the old grammar. It stays unmodified.
    • 427 |
    • rules: optional, the new rules.
    • 428 |
    • captures: optional, the final capture table.
    429 |

    430 | Returns:

      431 |
    • rules, suitably augmented by grammar and captures.
432 | 433 |

434 |


C () 435 |
    A capture function, made so that patt / C is equivalent to m.C(patt). It's intended to be used in capture tables, such as those required by pipe and apply.
436 | 437 |

438 |


complete (dest, orig) 439 |
    Completes dest with orig. 440 |

    441 | Parameters:

      442 |
    • dest: the new grammar. Must be a table.
    • 443 |
    • orig: the original grammar. Must be a table.
    444 |

    445 | Returns:

      446 |
    • dest, with new rules inherited from orig.
447 | 448 |

449 |


copy (grammar) 450 |
    Creates a shallow copy of grammar. 451 |

    452 | Parameters:

      453 |
    • grammar: a regular table.
    454 |

    455 | Returns:

      456 |
    • a newly created table, with grammar's keys and values.
457 | 458 |

459 |


Ct () 460 |
    A capture function, made so that patt / Ct is equivalent to m.Ct(patt). It's intended to be used in capture tables, such as those required by pipe and apply.
461 | 462 |

463 |


listOf (patt, sep) 464 |
    Returns a pattern which matches a list of patts, separated by sep. 465 |

    466 | Example: matching comma-separated values: 467 |

    468 | local g, m = require 'leg.grammar', require 'lpeg'
    469 | 
    470 |  -- separator
    471 | local sep = m.P',' + m.P'\n'
    472 | 
    473 |  -- element: anything but sep, capture it
    474 | local elem = m.C((1 - sep)^0)
    475 | 
    476 |  -- pattern
    477 | local patt = g.listOf(elem, sep)
    478 | 
    479 |  -- matching
    480 | print( patt:match [[a, b, 'christmas eve'
    481 |  d, evening; mate!
    482 |  f]])
    483 |  --> prints out "a        b       'christmas eve'  d        evening; mate! f"
    484 | 
    485 |

    486 | Parameters:

      487 |
    • patt: a LPeg pattern.
    • 488 |
    • sep: a LPeg pattern.
    489 |

    490 | Returns:

      491 |
    • the following pattern:
      patt * (sep * patt)^0
492 | 493 |

494 |


oneOf (list) 495 |
    Returns a pattern which matches any of the patterns in list. 496 |

    497 | Differently from anyOf, this function ensures sequential 498 | traversing. 499 |

    500 | Parameters:

      501 |
    • list: a list of LPeg patterns.
    502 |

    503 | Returns:

      504 |
    • a pattern which matches any of the patterns received.
505 | 506 |

507 |


pipe (dest, orig) 508 |
    Pipes the captures in orig to the ones in dest. 509 |

    510 | dest and orig should be tables, with each key storing a capture function. Each capture in dest will be altered to use the results for the matching one in orig as input, using function composition. Should orig possess keys not in dest, dest will copy them. 511 |

    512 | Parameters:

      513 |
    • dest: a capture table.
    • 514 |
    • orig: a capture table.
    515 |

    516 | Returns:

      517 |
    • dest, suitably modified.
518 | 519 |

520 |


pmatch (patt) 521 |
    Returns a pattern which simply fails to match if an error is thrown during the matching. 522 |

    523 | One usage example is parser.NUMBER. Originally it threw an error when trying to match a malformed number (such as 1e23e4), since in this case the input is obviously invalid and the pattern would be part of the Lua grammar. So pmatch is used to catch the error and return nil (signalling a non-match) and the error message. 524 |

    525 | Parameters:

      526 |
    • patt: a LPeg pattern.
    527 |

    528 | Returns:

      529 |
    • a pattern which catches any errors thrown during the matching and simply doesn't match instead of propagating the error.
530 | 531 |
532 | 533 | 534 | 535 | 536 |
537 |
538 |
539 | 540 | 541 | -------------------------------------------------------------------------------- /doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Leg 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 |
14 |

15 |

18 |
Leg
19 |
LPeg-powered Lua 5.1 grammar
20 |
21 | 22 |
Version: 0.2
23 |
Generated: December 07, 2007
24 | 25 |
26 | 27 |
28 | 29 | 84 | 85 |
86 | 87 | 88 | 89 |

90 | 91 | 92 |

93 |

Overview

94 |

95 | Leg is a Lua library which offers a complete Lua 5.1 grammar, along with some functions to use and modify it. Some examples of projects which could benefit from Leg are a syntax highlighter, a Luadoc-style document generator, and a macro preprocessor. 96 |

97 | Leg uses LPeg for pattern matching, and returns LPeg patterns for user manipulation. 98 |

99 | Leg is available under the same license as Lua 5.1. 100 |

101 |

Dependencies

102 | 104 |

105 |

Download

106 |

107 | Leg can be downloaded from its LuaForge page. 108 |

109 |

Credits

110 |

111 | This project is maintained by Humberto Anjos, and was adapted from an earlier project done with Francisco Sant'Anna. 112 |

113 |

License

114 |

115 | Copyright © 2007 Humberto Saraiva Nazareno dos Anjos. 116 |

117 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 118 |

119 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 120 |

121 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 122 |

123 |

124 |

125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 |

134 |
135 |
136 | 137 | 138 | -------------------------------------------------------------------------------- /doc/lua.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keplerproject/leg/dcd25ff58eab815cb158afcf4cc541281bc36408/doc/lua.gif -------------------------------------------------------------------------------- /doc/parser.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | parser 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 |
14 |

15 |

18 |
parser
19 |
Lua 5.1 parser
20 |
21 | 22 |
Version: 0.2
23 |
Generated: December 07, 2007
24 | 25 |
26 | 27 |
28 | 29 | 120 | 121 |
122 | 123 | 124 | 125 |

126 | 127 | 128 |

129 |

Description

130 |

131 | Exports Lua 5.1's lexical patterns and syntactic rules. 132 |

133 |

Dependencies

134 | 137 |

138 |

The Grammar

139 |

140 | The rules variable implements the official Lua 5.1 grammar. It includes all lexical rules, as well as the CHUNK rule, which matches a complete Lua source file. 141 |

142 | rules is a table with open references, not yet a LPeg pattern; to create a pattern, it must be given to lpeg.P. This is done to enable users to modify the grammar to suit their particular needs. grammar provides a small API for this purpose. 143 |

144 | The code below shows the Lua 5.1 grammar in LPeg, minus spacing issues. 145 |

146 | The following convention is used for rule names:

    147 |
  • TOKENRULE: token rules (which represent terminals) are in upper case when applicable (ex. +, WHILE, NIL, ..., THEN, {, ==).
  • 148 |
  • GrammarRule: the main grammar rules (non-terminals): Examples are Chunk, FuncName, BinOp, and TableConstructor.
  • 149 |
  • _GrammarRule: subdivisions of the main rules, introduced to ease captures. Examples are _SimpleExp, _PrefixExpParens and _FieldExp.
  • 150 |
  • METARULE: grammar rules with a special semantic meaning, to be used for capturing in later modules, like BOF, EOF and EPSILON.
151 |

152 |

153 | rules = {
154 |    -- See peculiarities below
155 |    EPSILON = lpeg.P(true)
156 |    EOF     = EOF  -- end of file rule
157 |    BOF     = BOF  -- beginning of file rule
158 |    Name    = ID
159 | 
160 |    -- Default initial rule
161 |    [1]     = CHUNK
162 |    CHUNK   = BANG^-1 * Block
163 | 
164 |    Chunk   = (Stat * ';'^-1)^0 * (LastStat * ';'^-1)^-1
165 |    Block   = Chunk
166 | 
167 |    -- STATEMENTS
168 |    Stat          = Assign + FunctionCall + Do + While + Repeat + If
169 |                  + NumericFor + GenericFor + GlobalFunction + LocalFunction
170 |                  + LocalAssign
171 |    Assign        = VarList * '=' * ExpList
172 |    Do            = 'do' * Block * 'end'
173 |    While         = 'while' * Exp * 'do' * Block * 'end'
174 |    Repeat        = 'repeat' * Block * 'until' * Exp
175 |    If            = 'if' * Exp * 'then' * Block
176 |                      * ('elseif' * Exp * 'then' * Block)^0
177 |                      * (('else' * Block) + EPSILON)
178 |                      * 'end'
179 |    NumericFor    = 'for' * Name * '='
180 |                      * Exp * ',' * Exp * ((',' * Exp) + EPSILON)
181 |                      * 'do' * Block * 'end'
182 |    GenericFor    = 'for' * NameList * 'in' * ExpList * 'do' * Block * 'end'
183 |    GlobalFunction = 'function' * FuncName * FuncBody
184 |    LocalFunction = 'local' * 'function' * Name * FuncBody
185 |    LocalAssign   = 'local' * NameList * ('=' * ExpList)^-1
186 |    LastStat      = 'return' * ExpList^-1
187 |                  + 'break'
188 | 
189 |    -- LISTS
190 |    VarList  = Var * (',' * Var)^0
191 |    NameList = Name * (',' * Name)^0
192 |    ExpList  = Exp * (',' * Exp)^0
193 | 
194 |    -- EXPRESSIONS
195 |    Exp          = _SimpleExp * (BinOp * _SimpleExp)^0
196 |    _SimpleExp   = 'nil' + 'false' + 'true' + Number + String + '...' + Function
197 |                 + _PrefixExp + TableConstructor + (UnOp * _SimpleExp)
198 |    _PrefixExp   = ( Name                  a Var
199 |                   + _PrefixExpParens      only an expression
200 |                   ) * (
201 |                       _PrefixExpSquare    a Var
202 |                     + _PrefixExpDot       a Var
203 |                     + _PrefixExpArgs      a FunctionCall
204 |                     + _PrefixExpColon     a FunctionCall
205 |                   ) ^ 0
206 | 
207 |    -- Extra rules for semantic actions:
208 |    _PrefixExpParens = '(' * Exp * ')'
209 |    _PrefixExpSquare = '[' * Exp * ']'
210 |    _PrefixExpDot    = '.' * ID
211 |    _PrefixExpArgs   = Args
212 |    _PrefixExpColon  = ':' * ID * _PrefixExpArgs
213 | 
214 |    -- These rules use an internal trick to be distingished from _PrefixExp
215 |    Var              = _PrefixExp
216 |    FunctionCall     = _PrefixExp
217 | 
218 |    -- FUNCTIONS
219 |    Function     = 'function' * FuncBody
220 |    FuncBody     = '(' * (ParList+EPSILON) * ')' * Block * 'end'
221 |    FuncName     = Name * _PrefixExpDot^0 * ((':' * ID)+EPSILON)
222 |    Args         = '(' * (ExpList+EPSILON) * ')'
223 |                 + TableConstructor + String
224 |    ParList      = NameList * (',' * '...')^-1
225 |                 + '...'
226 | 
227 |    -- TABLES
228 |    TableConstructor = '{' * (FieldList+EPSILON) * '}'
229 |    FieldList        = Field * (FieldSep * Field)^0 * FieldSep^-1
230 |    FieldSep         = ',' + ';'
231 | 
232 |    -- Extra rules for semantic actions:
233 |    _FieldSquare     = '[' * Exp * ']' * '=' * Exp
234 |    _FieldID         = ID * '=' * Exp
235 |    _FieldExp        = Exp
236 | 
237 |    -- OPERATORS
238 |    BinOp    = '+' + '-' + '*' + '/' + '^' + '%' + '..'
239 |             + '<' + '<=' + '>' + '>=' + '==' + '~='
240 |             + 'and' + 'or'
241 |    UnOp     = '-' + 'not' + '#'
242 | 
243 |    -- IDENTIFIERS
244 |  , ID      = ([_A-Za-z] * ([0-9_A-Za-z])^0) - Keyword
245 |  , Keyword = ...  -- any of Lua's keywords
246 |  , Symbol  = ...  -- any of Lua's symbols
247 |  
248 |    -- ...plus a rule for each keyword and symbol
249 | }
250 | 
251 |

252 | The implementation has certain peculiarities that merit clarification: 253 |

    254 |
  • Spacing is matched only between two tokens in a rule, never at the beginning or the end of a rule.
255 |
    256 |
  • EPSILON matches the empty string, which means that it always succeeds without consuming input. Although rule + EPSILON can be changed to rule^-1 without any loss of syntactic power, EPSILON was introduced in the parser due to it's usefulness as a placeholder for captures.
257 |
    258 |
  • BOF and EOF are rules used to mark the bounds of a parsing match, and are useful for semantic actions.
259 |
    260 |
  • Name versus ID: the official Lua grammar doesn't distinguish between them, as their syntax is exactly the same (Lua identifiers). But semantically Name is a variable identifier, and ID is used with different meanings in _FieldID, FuncName, _PrefixExpColon and _PrefixExpDot.
261 |
    262 |
  • In Lua's original extended BNF grammar, Var and FunctionCall are defined using left recursion, which is unavailable in PEGs. In this implementation, the problem was solved by modifying the PEG rules to eliminate the left recursion, and by setting some markers (with some LPeg chicanery) to ensure the proper pattern is being used.
263 |

264 |

265 | 266 | 267 | 268 |


269 |

Variables

270 | 271 | 272 | 273 | 274 | 275 | 277 | 278 | 279 | 280 | 281 | 282 | 284 | 285 | 286 | 287 | 288 | 289 | 291 | 292 | 293 | 294 | 295 | 296 | 298 | 299 | 300 | 301 | 302 | 303 | 305 | 306 | 307 | 308 | 309 | 310 | 312 | 313 | 314 | 315 | 316 | 317 | 319 | 320 | 321 | 322 | 323 | 324 | 326 | 327 | 328 | 329 | 330 | 331 | 333 | 334 | 335 | 336 | 337 | 338 | 342 | 343 | 344 | 345 | 346 | 347 | 349 | 350 | 351 | 352 | 353 | 354 | 356 | 357 | 358 | 359 | 360 | 361 | 363 | 364 | 365 | 366 | 367 | 368 | 370 | 371 | 372 |
ANY = LPeg pattern 
Matches any token, comment or space. 276 |
BANG = LPeg pattern 
Matches UNIX's shebang (e.g. #!/usr/bin/lua). 283 |
BOF = LPeg pattern 
Matches the beginning of a file. 290 |
COMMENT = LPeg pattern 
Matches any Lua comment. 297 |
EOF = LPeg pattern 
Matches the end of a file. 304 |
IDENTIFIER = LPeg pattern 
A pattern which matches any Lua identifier. 311 |
IGNORED = LPeg pattern 
Matches everything ignored by the parser. 318 |
KEYWORD = LPeg pattern 
A pattern which matches any Lua keyword. 325 |
NUMBER = LPeg pattern 
Matches any Lua number. 332 |
rules = table 

339 | A table holding the Lua 5.1 grammar. See The Grammar for an extended explanation. 340 |

341 |

SPACE = LPeg pattern 
Matches any space character. 348 |
STRING = LPeg pattern 
Matches any Lua string. 355 |
SYMBOL = LPeg pattern 
A pattern which matches any Lua symbol. 362 |
TOKEN = LPeg pattern 
Matches any Lua identifier, keyword, symbol, number or string. 369 |
373 | 374 | 375 | 376 |
377 |

Functions

378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 |
apply (extraRules, captures) Uses grammar.apply to return a new grammar, with captures and extra rules. rules stays unmodified.
check (input) Checks if input is valid Lua source code.
comment2text (comment) Strips all prefixing -- and enclosing --[=*[ from comment tokens.
string2text (str) Strips all enclosing ', ", and [=*[ from string tokens, and processes escape characters.
text2comment (text) Encloses the text with comment markers.
text2string (text) Transforms a text into a syntactically valid Lua string. Similar to string.format with the '%q' option, but inserting escape numbers and escape codes where applicable.
417 | 418 | 419 |

420 |


apply (extraRules, captures) 421 |
    Uses grammar.apply to return a new grammar, with captures and extra rules. rules stays unmodified. 422 |

    423 | Parameters:

      424 |
    • extraRules: optional, the new and modified rules. See grammar.apply for the accepted format.
    • 425 |
    • captures: optional, the desired captures. See grammar.apply for the accepted format.
    426 |

    427 | Returns:

      428 |
    • the extended grammar.
429 | 430 |

431 |


check (input) 432 |
    Checks if input is valid Lua source code. 433 |

    434 | Parameters:

      435 |
    • input: a string containing Lua source code.
    436 |

    437 | Returns:

      438 |
    • true, if input is valid Lua source code, or false and an error message if the matching fails.
439 | 440 |

441 |


comment2text (comment) 442 |
    Strips all prefixing -- and enclosing --[=*[ from comment tokens. 443 |

    444 | Parameters:

      445 |
    • comment: the comment to strip.
    446 |

    447 | Returns:

      448 |
    • the text without comment marking syntax.
449 | 450 |

451 |


string2text (str) 452 |
    Strips all enclosing ', ", and [=*[ from string tokens, and processes escape characters. 453 |

    454 | Parameters:

      455 |
    • str: the string to strip.
    456 |

    457 | Returns:

      458 |
    • the text without string enclosers.
459 | 460 |

461 |


text2comment (text) 462 |
    Encloses the text with comment markers. 463 |

    464 | Parameters:

      465 |
    • text: the text to comment.
    466 |

    467 | Returns:

      468 |
    • the text with comment marking syntax.
469 | 470 |

471 |


text2string (text) 472 |
    Transforms a text into a syntactically valid Lua string. Similar to string.format with the '%q' option, but inserting escape numbers and escape codes where applicable. 473 |

    474 | Parameters

      475 |
    • text: a string containing the text.
    476 |

    477 | Returns:

      478 |
    • a string, similar to string.format with option '%q'.
479 | 480 |
481 | 482 | 483 | 484 | 485 |
486 |
487 |
488 | 489 | 490 | -------------------------------------------------------------------------------- /examples/comment-extraction.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- 3 | -- The comment extraction example given in doc/grammar.html. For those of you 4 | -- who haven't read it, the code and comments are available below. 5 | -- 6 | -- Authors: Humberto Anjos and Francisco Sant'Anna 7 | -- 8 | -- $Id: comment-extraction.lua,v 1.3 2007/12/07 14:23:56 hanjos Exp $ 9 | -- 10 | ------------------------------------------------------------------------------- 11 | 12 | -- some imports to get things started. 13 | local lpeg = require 'lpeg' 14 | 15 | require "leg" -- check if available (and preload leg submodules) 16 | local parser = require 'leg.parser' 17 | local grammar = require 'leg.grammar' 18 | 19 | -- some aliasing 20 | local P, V = lpeg.P, lpeg.V 21 | 22 | -- argument capturing 23 | local args = { ... } 24 | 25 | -- 26 | -- Let's consider the problem of documenting a Lua module. In this case, comments 27 | -- must be captured before every function declaration when in the outermost scope. 28 | -- 29 | 30 | -- the code to parse 31 | local subject = args[1] or [=[ 32 | -- Calculates the sum a+b. 33 | -- An extra line. 34 | function sum (a, b) 35 | -- code 36 | end 37 | 38 | -- a variable assignment is not a "proper" declaration for an 39 | -- exported function 40 | f1 = function () 41 | -- code 42 | end 43 | 44 | while true do 45 | -- this function is not in the outermost scope 46 | function aux() end 47 | end 48 | 49 | function something:other(a, ...) 50 | -- a global function without comments 51 | 52 | return a, ... -- won't appear in the result 53 | end 54 | ]=] 55 | 56 | -- In the code above we want only to document sum and something:other, as 57 | -- f1 isn't properly (by our standards) declared and aux is not in the 58 | -- outermost scope (although it is still a global function). 59 | -- 60 | -- Let's define some patterns to simplify our job: 61 | 62 | -- spacing rule 63 | local S = parser.SPACE ^ 0 64 | 65 | -- matches any Lua statement, no captures 66 | Stat = P( parser.apply(V'Stat', nil) ) 67 | 68 | -- some interesting captures bundled up in a table. Note that the table keys 69 | -- match the grammar rules we want to add captures to. Any rules not in the 70 | -- `rules` table below will come from parser.rules . 71 | captures = { 72 | [1] = function (...) -- the initial rule 73 | return ''..table.concat{...}..'' 74 | end, 75 | 76 | GlobalFunction = function (name, parlist) -- global function declaration 77 | return ''..name..''..(parlist or '')..'' 78 | end, 79 | 80 | FuncName = grammar.C, -- capture the raw text 81 | ParList = grammar.C, -- capture the raw text 82 | COMMENT = parser.comment2text, -- extract comment trappings 83 | } 84 | 85 | -- the rules table 86 | rules = { 87 | [1] = ((V'COMMENT' *S) ^ 0) *S* V'GlobalFunction', -- new initial rule 88 | COMMENT = parser.COMMENT, -- just to add COMMENT's capture to the capture table 89 | } 90 | 91 | -- building the new grammar and adding the captures. This pattern will match 92 | -- any global function optionally preceded by comments, and return a string 93 | -- in the following format: 94 | -- 95 | -- commentsnameparameter list 96 | commentedFunc = P( grammar.apply(parser.rules, rules, captures) ) 97 | 98 | -- finally, this pattern matches all commented global functions, Stats 99 | -- or any other Lua tokens and packages the results in a table. This is done 100 | -- to capture only global function declarations in the global scope. 101 | patt = (commentedFunc + Stat + parser.ANY)^0 / function(...) 102 | return table.concat({...}, '\n\n') -- some line breaks for easier reading 103 | end 104 | 105 | -- now match subject 106 | print('subject:', '\n'..subject) 107 | print('result:', '\n'..patt:match(subject)) 108 | -------------------------------------------------------------------------------- /examples/local-imports.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- 3 | -- A preprocessor that detects which basic modules and functions are being 4 | -- used, and adds local declarations at the top of the code. Somewhat useful 5 | -- (at least for me) along a module declaration. 6 | -- 7 | -- This is a very simplistic preprocessor: it simply searches for identifiers 8 | -- with the same name as a basic module or function. This might yield false 9 | -- positives, as a variable which happens to have the same name as a basic 10 | -- value will be incorrectly counted. Since the code generated here goes before 11 | -- the code in the input, the program still runs normally, so the false 12 | -- positives don't affect its semantics. 13 | -- 14 | -- A more complex analysis could be made, but it would be far easier to do it 15 | -- with an AST. 16 | -- 17 | -- Author: Humberto Anjos 18 | -- 19 | -- $Id: local-imports.lua,v 1.2 2007/12/07 14:23:56 hanjos Exp $ 20 | -- 21 | ------------------------------------------------------------------------------- 22 | 23 | 24 | local lpeg = require 'lpeg' 25 | 26 | require "leg" -- check if available (and preload leg submodules) 27 | -- imported modules 28 | local parser = require 'leg.parser' 29 | 30 | 31 | -- imported functions 32 | local P = lpeg.P 33 | 34 | -- HELPER CODE ------------------------ 35 | 36 | -- tables using the names of Lua's basic modules and functions as keys, 37 | -- for easy searching 38 | local basicModules, basicFunctions = {}, {} 39 | 40 | -- populating the tables 41 | for k, v in pairs(_G) do 42 | if type(v) == 'table' then 43 | basicModules[k] = true 44 | elseif type(v) == 'function' then 45 | basicFunctions[k] = true 46 | end 47 | end 48 | 49 | -- to pretty print the statements 50 | local function maxNameLength(list) 51 | local max = 0 52 | 53 | for _, v in ipairs(list) do 54 | if max < #v then max = #v end 55 | end 56 | 57 | return max 58 | end 59 | 60 | local function buildStatements(list, type) 61 | local str, max = '-- basic '..type..'\n', maxNameLength(list) 62 | 63 | table.sort(list) 64 | for _, v in ipairs(list) do 65 | str = str..'local '..v..string.rep(' ', max - #v)..' = '..v..'\n' 66 | end 67 | 68 | return str 69 | end 70 | 71 | local modules, functions = {}, {} 72 | local ID = parser.IDENTIFIER / function (id) 73 | if basicModules[id] and not modules[id] then 74 | modules[#modules + 1] = id 75 | modules[id] = #modules 76 | elseif basicFunctions[id] and not functions[id] then 77 | functions[#functions + 1] = id 78 | functions[id] = #functions 79 | end 80 | end 81 | 82 | local ALL = P( (ID + 1)^0 ) 83 | 84 | -- TESTING ---------------------------- 85 | local args, input = { ... }, nil 86 | 87 | if args[1] then 88 | input = io.open(args[1], 'r') 89 | 90 | if input then -- it's a file 91 | input = input:read'*a' 92 | else -- it's a string with code 93 | input = args[1] 94 | end 95 | end 96 | 97 | subject = input or [=[ 98 | local a = _VERSION or math.pi 99 | local table = {} -- false positive 100 | 101 | for i, v in ipairs(table.sort(_G[t])) do 102 | if type(v) == 'function' then 103 | print(i, v) 104 | end 105 | end 106 | ]=] 107 | 108 | ALL:match(subject) 109 | 110 | result = buildStatements(modules, 'modules')..'\n' 111 | ..buildStatements(functions, 'functions')..'\n-- code\n' 112 | ..subject 113 | 114 | --print('subject:', '\n'..subject) 115 | --print('\nresult:', '\n'..result) 116 | 117 | print(result) 118 | -------------------------------------------------------------------------------- /examples/metalua-ast.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- 3 | -- An AST builder for Leg. This AST is from Metalua 4 | -- (http://metalua.luaforge.net). 5 | -- 6 | -- Author: Humberto Anjos (the code below) and Fabien Fleutot (the AST design) 7 | -- 8 | -- $Id: metalua-ast.lua,v 1.3 2007/12/07 14:23:56 hanjos Exp $ 9 | -- 10 | ------------------------------------------------------------------------------- 11 | 12 | -- basic modules 13 | local _G = _G 14 | local math = math 15 | local string = string 16 | local table = table 17 | 18 | -- basic functions 19 | local error = error 20 | local ipairs = ipairs 21 | local pairs = pairs 22 | local print = print 23 | local require = require 24 | local select = select 25 | local tonumber = tonumber 26 | local type = type 27 | local unpack = unpack 28 | 29 | require "leg" -- check if available (and preload leg submodules) 30 | -- imported modules 31 | local parser = require 'leg.parser' 32 | local grammar = require 'leg.grammar' 33 | 34 | local lpeg = require 'lpeg' 35 | 36 | -- imported functions 37 | local P = lpeg.P 38 | 39 | -- AST BUILDING FUNCTIONS ------------- 40 | 41 | -- the table holding the node builders 42 | builder = {} 43 | 44 | -- last stats 45 | function builder.Break() return { tag = 'Break' } end 46 | function builder.Return(...) return { tag = 'Return', ... } end 47 | 48 | -- statements 49 | function builder.Do(block) return { tag = 'Do', block } end 50 | function builder.Let(lhslist, exprlist) 51 | return { tag = 'Let', lhslist, exprlist } 52 | end 53 | 54 | function builder.While(expr, block) return { tag = 'While', expr, block } end 55 | function builder.Repeat(block, expr) return { tag = 'Repeat', block, expr } end 56 | function builder.If(...) return { tag = 'If', ... } end 57 | 58 | function builder.Fornum(var, index, limit, step, block) 59 | return { tag = 'Fornum', var, index, limit, step or Number(1), block } 60 | end 61 | 62 | function builder.Forin(varlist, exprlist, block) 63 | return { tag = 'Forin', varlist, exprlist, block } 64 | end 65 | 66 | function builder.Local(varlist, exprlist) 67 | return { tag = 'Local', varlist, exprlist } 68 | end 69 | 70 | function builder.Localrec(varlist, exprlist) 71 | return { tag = 'Localrec', varlist, exprlist } 72 | end 73 | 74 | function builder.Call(func, ...) return { tag = 'Call', func, ... } end 75 | function builder.Method(table, string, ...) 76 | return { tag = 'Method', table, string, ... } 77 | end 78 | 79 | -- expressions 80 | function builder.Nil() return { tag = 'Nil' } end 81 | function builder.Dots() return { tag = 'Dots' } end 82 | function builder.True() return { tag = 'True' } end 83 | function builder.False() return { tag = 'False' } end 84 | function builder.Number(number) return { tag = 'Number', number } end 85 | function builder.String(string) return { tag = 'String', string } end 86 | 87 | function builder.Function(parlist, block) 88 | return { tag = 'Function', parlist, block } 89 | end 90 | 91 | function builder.Table(...) return { tag = 'Table', ... } end 92 | function builder.Key(key, value) return { tag = 'Key', key, value } end 93 | 94 | function builder.Op(op, value1, value2) 95 | return { tag = 'Op', op, value1, value2 } 96 | end 97 | 98 | -- a parenthesized expression 99 | function builder.One(expr) return { tag = 'One', expr } end 100 | 101 | -- variables 102 | function builder.Id(identifier) return { tag = 'Id', identifier } end 103 | function builder.Index(table, index) return { tag = 'Index', table, index } end 104 | 105 | -- operators 106 | function builder.Add() return { tag = 'Add' } end 107 | function builder.Sub() return { tag = 'Sub' } end 108 | function builder.Mul() return { tag = 'Mul' } end 109 | function builder.Div() return { tag = 'Div' } end 110 | function builder.Mod() return { tag = 'Mod' } end 111 | function builder.Pow() return { tag = 'Pow' } end 112 | function builder.Concat() return { tag = 'Concat' } end 113 | function builder.Eq() return { tag = 'Eq' } end 114 | function builder.Ne() return { tag = 'Ne' } end 115 | function builder.Gt() return { tag = 'Gt' } end 116 | function builder.Ge() return { tag = 'Ge' } end 117 | function builder.Lt() return { tag = 'Lt' } end 118 | function builder.Le() return { tag = 'Le' } end 119 | function builder.And() return { tag = 'And' } end 120 | function builder.Or() return { tag = 'Or' } end 121 | function builder.Not() return { tag = 'Not' } end 122 | function builder.Len() return { tag = 'Len' } end 123 | 124 | -- technically, the operator Sub is also used for the unary operator -, 125 | -- but to avoid ambiguities during the construction of the expression tree, 126 | -- I preferred to build an Unm node and change it to a Sub when the node 127 | -- is safely identified as an unary -. 128 | function builder.Unm() return { tag = 'Unm' } end 129 | 130 | -- OPERATOR PROCESSING CODE ----------- 131 | 132 | -- OBS.: 133 | -- Leg's grammar does not specify operator precedence, so it must be treated 134 | -- outside the grammar. This really sucks, and it's on the list of things to 135 | -- improve in future versions. 136 | 137 | -- Operator precedence table. Maps operator tags to a table holding the 138 | -- respective precedence, left or right associativity, and arity (unary 139 | -- or binary) 140 | local ops = { 141 | Or = { precedence = 1, left = true, arity = 2 }, 142 | And = { precedence = 2, left = true, arity = 2 }, 143 | Eq = { precedence = 3, left = true, arity = 2 }, 144 | Ne = { precedence = 3, left = true, arity = 2 }, 145 | Le = { precedence = 3, left = true, arity = 2 }, 146 | Ge = { precedence = 3, left = true, arity = 2 }, 147 | Lt = { precedence = 3, left = true, arity = 2 }, 148 | Gt = { precedence = 3, left = true, arity = 2 }, 149 | Concat = { precedence = 4, right = true, arity = 2 }, 150 | Add = { precedence = 5, left = true, arity = 2 }, 151 | Sub = { precedence = 5, left = true, arity = 2 }, 152 | Mul = { precedence = 6, left = true, arity = 2 }, 153 | Div = { precedence = 6, left = true, arity = 2 }, 154 | Mod = { precedence = 6, left = true, arity = 2 }, 155 | Not = { precedence = 7, arity = 1 }, 156 | Len = { precedence = 7, arity = 1 }, 157 | Unm = { precedence = 7, arity = 1 }, 158 | Pow = { precedence = 8, right = true, arity = 2 } 159 | } 160 | 161 | -- some self-explaining helper functions 162 | local function isOperator(node) 163 | return node and ops[node.tag] 164 | end 165 | 166 | local function isUnary(node) 167 | return isOperator(node) and ops[node.tag].arity == 1 168 | end 169 | 170 | local function isBinary(node) 171 | return isOperator(node) and ops[node.tag].arity == 2 172 | end 173 | 174 | 175 | -- Takes a list of tokens with Lua values and operators and returns it in 176 | -- Reverse Polish Notation, using Dijkstra's shunting yard algorithm. The 177 | -- actual expression tree will be built in Exp's capture function 178 | local function toRPN(list) 179 | local queue = {} 180 | local stack = {} 181 | 182 | for _, v in ipairs(list) do 183 | if isBinary(v) or isUnary(v) then 184 | local vPrec, topPrec 185 | 186 | if stack[#stack] then 187 | vPrec, topPrec = ops[v.tag].precedence, 188 | ops[stack[#stack][1].tag].precedence 189 | end 190 | 191 | while stack[#stack] and ((ops[v.tag].right and vPrec < topPrec) 192 | or (ops[v.tag].left and vPrec <= topPrec)) do 193 | 194 | queue[#queue + 1] = table.remove(stack) 195 | end 196 | 197 | stack[#stack + 1] = builder.Op(v) 198 | else 199 | queue[#queue + 1] = v 200 | end 201 | end 202 | 203 | -- dumping the stack 204 | for i = #stack, 1, -1 do 205 | queue[#queue + 1] = stack[i] 206 | end 207 | 208 | return queue 209 | end 210 | 211 | -- a temporary node 212 | local function MethodDecl(Index, Method) 213 | return { tag = 'MethodDecl', Index, Method } 214 | end 215 | 216 | -- a temporary node 217 | local hole = { tag = 'Hole' } 218 | 219 | -- a table mapping an operator to its builder function 220 | local opToBuilder = { 221 | ['or'] = builder.Or, 222 | ['and'] = builder.And, 223 | ['=='] = builder.Eq, 224 | ['~='] = builder.Ne, 225 | ['<='] = builder.Le, 226 | ['>='] = builder.Ge, 227 | ['<'] = builder.Lt, 228 | ['>'] = builder.Gt, 229 | ['..'] = builder.Concat, 230 | ['+'] = builder.Add, 231 | ['-'] = builder.Sub, 232 | ['*'] = builder.Mul, 233 | ['/'] = builder.Div, 234 | ['%'] = builder.Mod, 235 | ['not'] = builder.Not, 236 | ['#'] = builder.Len, 237 | ['unm'] = builder.Unm, 238 | ['^'] = builder.Pow, 239 | } 240 | 241 | -- CAPTURE TABLE ---------------------- 242 | 243 | -- the capture table. This table will be piped to parser.rules to build the 244 | -- AST. 245 | captures = { 246 | -- Statements 247 | Block = function (...) 248 | local Block = { ... } 249 | 250 | -- if the captured block has no statements, Block will contain { '' }. 251 | -- Detect that and return an empty table in that case 252 | if #Block == 1 and Block[1] == '' then 253 | return {} 254 | end 255 | 256 | return Block 257 | end, 258 | 259 | Assign = builder.Let, 260 | Do = builder.Do, 261 | While = builder.While, 262 | Repeat = builder.Repeat, 263 | If = builder.If, 264 | NumericFor = builder.Fornum, 265 | GenericFor = builder.Forin, 266 | 267 | GlobalFunction = function (FuncName, FuncBody) 268 | if FuncName.tag == 'MethodDecl' then -- it's a method declaration 269 | -- a method declaration like 'function b:o() <...> end' is equivalent to 270 | -- 'b.o = function (self) <...> end' 271 | 272 | FuncName.tag = 'Index' -- FuncName should be an Index node then 273 | 274 | local parlist = FuncBody[1] 275 | table.insert(parlist, 1, builder.Id 'self') 276 | end 277 | 278 | return builder.Let( { FuncName }, { FuncBody } ) 279 | end, 280 | 281 | LocalFunction = function (Name, FuncBody) 282 | return builder.Localrec( { Name }, { FuncBody }) 283 | end, 284 | 285 | LocalAssign = function (NameList, ExpList) 286 | return builder.Local(NameList, ExpList or {}) 287 | end, 288 | 289 | LastStat = function (STAT) 290 | if STAT == 'break' then 291 | return builder.Break() 292 | else 293 | if STAT == 'return' then 294 | STAT = {} 295 | end 296 | 297 | return builder.Return(unpack(STAT)) 298 | end 299 | end, 300 | 301 | -- Expressions 302 | 303 | -- Takes a list of tokens and operators and builds the appropriate tree node 304 | Exp = function (...) 305 | local list = { ... } 306 | 307 | if #list == 1 then 308 | return list[1] 309 | end 310 | 311 | local listRPN = toRPN(list) -- putting the list in RPN 312 | local stack = {} 313 | 314 | for _, v in ipairs(listRPN) do 315 | if v.tag == 'Op' and isUnary(v[1]) and not v[2] then 316 | if v[1].tag == 'Unm' then -- replacing Unm with Sub 317 | v[1].tag = 'Sub' 318 | end 319 | 320 | v[2] = table.remove(stack) 321 | elseif v.tag == 'Op' and isBinary(v[1]) and not v[2] and not v[3] then 322 | v[3] = table.remove(stack) 323 | v[2] = table.remove(stack) 324 | end 325 | 326 | stack[#stack + 1] = v 327 | end 328 | 329 | return stack[1] 330 | end, 331 | 332 | _PrefixExp = function (base, ...) 333 | for _, suffix in ipairs { ... } do 334 | -- filling the holes 335 | suffix[1] = base 336 | base = suffix 337 | end 338 | 339 | return base 340 | end, 341 | 342 | _PrefixExpParens = function (Exp) 343 | return builder.One(Exp) 344 | end, 345 | 346 | _PrefixExpDot = function (ID) 347 | -- the hole will be filled in _PrefixExp 348 | return builder.Index(hole, builder.String(ID)) 349 | end, 350 | 351 | _PrefixExpSquare = function (Exp) 352 | -- the hole will be filled in _PrefixExp 353 | return builder.Index(hole, Exp) 354 | end, 355 | 356 | _PrefixExpColon = function (ID, _PrefixExpArgs) 357 | -- the hole will be filled in _PrefixExp 358 | return builder.Method(hole, builder.String(ID), 359 | select(2, unpack(_PrefixExpArgs))) 360 | end, 361 | 362 | _PrefixExpArgs = function (Args) 363 | -- the hole will be filled in _PrefixExp 364 | return builder.Call(hole, unpack(Args)) 365 | end, 366 | 367 | -- Functions and closures 368 | FuncName = function (Name, ...) 369 | local base = Name 370 | 371 | for _, v in ipairs {...} do 372 | if type(v) == 'string' then -- it's a method 373 | -- using MethodDecl; this will be transformed into an Index node later 374 | base = MethodDecl(base, builder.String(v)) 375 | 376 | elseif v.tag == 'Index' then 377 | v[1] = base 378 | 379 | base = v 380 | end 381 | end 382 | 383 | return base 384 | end, 385 | 386 | FuncBody = function (ParList, Block) 387 | return builder.Function(ParList or {}, Block) 388 | end, 389 | 390 | Args = function (arg) 391 | if (not arg) or arg.tag then -- there's either one or no arguments 392 | arg = { arg } 393 | end 394 | 395 | return arg 396 | end, 397 | 398 | -- Lists 399 | VarList = grammar.Ct, 400 | NameList = grammar.Ct, 401 | ExpList = grammar.Ct, 402 | 403 | ParList = function (NameList, varargs) 404 | if NameList.tag == 'Dots' then -- the parameter list is just ... 405 | return { NameList } 406 | end 407 | 408 | NameList[#NameList + 1] = varargs 409 | return NameList 410 | end, 411 | 412 | -- Table constructors 413 | TableConstructor = function (FieldList) 414 | FieldList = FieldList or {} 415 | 416 | return builder.Table(unpack(FieldList)) 417 | end, 418 | 419 | -- fields 420 | FieldList = grammar.Ct, 421 | _FieldSquare = builder.Key, 422 | _FieldExp = grammar.C, 423 | 424 | _FieldID = function (ID, Exp) 425 | return builder.Key(builder.String(ID), Exp) 426 | end, 427 | 428 | -- Operators 429 | BinOp = function (op) return opToBuilder[op]() end, 430 | UnOp = function (op) 431 | if op == '-' then 432 | return opToBuilder['unm']() 433 | else 434 | return opToBuilder[op]() 435 | end 436 | end, 437 | 438 | -- Simple expressions 439 | NIL = builder.Nil, 440 | TRUE = builder.True, 441 | FALSE = builder.False, 442 | NUMBER = function (num) return builder.Number(tonumber(num)) end, 443 | STRING = function (str) return builder.String(parser.string2text(str)) end, 444 | ID = grammar.C, 445 | Name = builder.Id, 446 | ['...'] = builder.Dots, 447 | 448 | -- Helper patterns 449 | EPSILON = function () return nil end, 450 | } 451 | 452 | -- the matching pattern 453 | local patt = P( parser.apply(nil, captures) ) 454 | 455 | -- Takes a string and checks if it's syntactically valid Lua 5.1 code. If it 456 | -- is, the corresponding AST is built and returned; if not, an error is thrown. 457 | function build(input) 458 | local result, msg = parser.check(input) 459 | 460 | if result then return patt:match(input) 461 | else error(msg) end 462 | end 463 | 464 | -- shamelessly stolen from Metalua: this is its table.tostring function, slightly 465 | -- adapted to substitute its dependencies for my own code. 466 | local function ast2string(t, ...) 467 | local LINE_MAX, PRINT_HASH = math.huge, true 468 | for _, x in ipairs {...} do 469 | if type(x) == "number" then LINE_MAX = x 470 | elseif x=="nohash" then PRINT_HASH = false 471 | end 472 | end 473 | 474 | local current_offset = 0 -- indentation level 475 | local xlen_cache = { } -- cached results for xlen() 476 | local acc_list = { } -- Generated bits of string 477 | local function acc(...) -- Accumulate a bit of string 478 | local x = table.concat{...} 479 | current_offset = current_offset + #x 480 | table.insert(acc_list, x) 481 | end 482 | local function valid_id(x) 483 | -- FIXME: we should also reject keywords. 484 | return type(x) == "string" and parser.IDENTIFIER:match(x) 485 | end 486 | local function shallowcopy(t) 487 | local newt = {} 488 | 489 | for k, v in pairs(t) do 490 | newt[k] = v 491 | end 492 | 493 | return newt 494 | end 495 | 496 | -- Compute the number of chars it would require to display the table 497 | -- as a single line. Helps to decide whether some carriage returns are 498 | -- required. Since the size of each sub-table is required many times, 499 | -- it's cached in [xlen_cache]. 500 | local xlen_type = { } 501 | local function xlen(x, tracker) 502 | tracker = tracker or { } 503 | if x==nil then return #"nil" end 504 | if tracker[x] then return #_G.tostring(x) end 505 | local len = xlen_cache[x] 506 | if len then return len end 507 | local f = xlen_type[type(x)] 508 | if not f then return #_G.tostring(x) end 509 | len = f (x, tracker) 510 | xlen_cache[x] = len 511 | return len 512 | end 513 | 514 | -- optim: no need to compute lengths if I'm not going to use them 515 | -- anyway. 516 | if LINE_MAX == math.huge then xlen = function() return 0 end end 517 | 518 | xlen_type["nil"] = function() return 3 end 519 | function xlen_type.number(x) return #_G.tostring(x) end 520 | function xlen_type.boolean(x) return x and 4 or 5 end 521 | function xlen_type.string(x) return #string.format("%q",x) end 522 | function xlen_type.table (adt, tracker) 523 | 524 | -- Circular references detection 525 | tracker = shallowcopy(tracker) 526 | tracker [adt] = true 527 | 528 | local has_tag = valid_id(adt.tag) 529 | local alen = #adt 530 | local has_arr = alen>0 531 | local has_hash = false 532 | local x = 0 533 | 534 | if PRINT_HASH then 535 | -- first pass: count hash-part 536 | for k, v in pairs(adt) do 537 | if k=="tag" and has_tag then 538 | -- this is the tag -> do nothing! 539 | elseif type(k)=="number" and k<=alen and math.fmod(k,1)==0 then 540 | -- array-part pair -> do nothing! 541 | else 542 | has_hash = true 543 | if valid_id(k) then x=x+#k 544 | else x = x + xlen (k, tracker) + 2 end -- count surrounding barckets 545 | x = x + xlen (v, tracker) + 5 -- count " = " and ", " 546 | end 547 | end 548 | end 549 | 550 | for i = 1, alen do x = x + xlen (adt[i], tracker) + 2 end -- count ", " 551 | 552 | if not (has_tag or has_arr or has_hash) then return 3 end 553 | if has_tag then x=x+#adt.tag+1 end 554 | if not (has_arr or has_hash) then return x end 555 | if not has_hash and alen==1 and type(adt[1])~="table" then 556 | return x-2 -- substract extraneous ", " 557 | end 558 | return x+2 -- count "{ " and " }", substract extraneous ", " 559 | end 560 | 561 | -- Recursively print a (sub) table at given indentation level. 562 | -- [newline] indicates whether newlines should be inserted. 563 | local function rec (adt, indent, tracker) 564 | local function acc_newline() 565 | acc ("\n"); acc (string.rep (" ", indent)) 566 | current_offset = indent 567 | end 568 | local x = { } 569 | x["nil"] = function() acc "nil" end 570 | function x.number() acc (_G.tostring (adt)) end 571 | function x.string() acc (string.format ("%q", adt)) end 572 | function x.boolean() acc (adt and "true" or "false") end 573 | function x.table() 574 | tracker[adt] = true 575 | local has_tag = valid_id(adt.tag) 576 | local alen = #adt 577 | local has_arr = alen>0 578 | local has_hash = false 579 | local new_indent 580 | if has_tag then acc("`"); acc(adt.tag) end 581 | 582 | -- First pass: handle hash-part 583 | if PRINT_HASH then 584 | for k, v in pairs(adt) do 585 | if k=="tag" and has_tag then -- this is the tag -> do nothing! 586 | elseif type(k)=="number" and k<=alen and math.fmod(k,1)==0 then 587 | -- nothing: this an array-part pair, parsed later 588 | else -- hash-part pair 589 | 590 | -- Is it the first time we parse a hash pair? 591 | if not has_hash then acc "{ "; indent = current_offset 592 | else acc ", " end 593 | 594 | -- Determine whether a newline is required 595 | local is_id, expected_len = valid_id(k) 596 | if is_id then expected_len = #k + xlen (v, tracker) + #" = , " 597 | else expected_len = xlen (k, tracker) + 598 | xlen (v, tracker) + #"[] = , " end 599 | if has_hash and expected_len + current_offset > LINE_MAX 600 | then acc_newline() end 601 | 602 | -- Print the key 603 | if is_id then acc(k); acc " = " 604 | else acc "["; rec (k, current_offset, tracker); acc "] = " end 605 | 606 | -- Print the value 607 | rec (v, current_offset, tracker) 608 | has_hash = true 609 | end 610 | end 611 | end 612 | 613 | -- now we know whether there's a hash-part, an array-part, and a tag. 614 | -- Tag and hash-part are already printed if they're present. 615 | if not has_tag and not has_hash and not has_arr then acc "{ }"; return 616 | elseif has_tag and not has_hash and not has_arr then return -- nothing! 617 | else -- has_hash or has_arr 618 | if has_hash and has_arr then acc ", " 619 | elseif has_tag and not has_hash and alen==1 and type(adt[1])~="table" then 620 | -- No brace required; don't print "{" and return before printing "}" 621 | acc (" "); rec (adt[1], new_indent, tracker); return 622 | elseif not has_hash then 623 | -- Braces required, but not opened by hash-part handler yet 624 | acc "{ "; indent = current_offset 625 | end 626 | 627 | -- 2nd pass: array-part 628 | if has_arr then 629 | rec (adt[1], new_indent, tracker) 630 | for i=2, alen do 631 | acc ", "; 632 | if current_offset + xlen (adt[i], { }) > LINE_MAX 633 | then acc_newline() end 634 | rec (adt[i], new_indent, tracker) 635 | end 636 | end 637 | acc " }" 638 | end 639 | end 640 | local y = x[type(adt)] 641 | if y then y() else acc(_G.tostring(adt)) end 642 | end 643 | rec(t, 0, { }) 644 | return table.concat (acc_list) 645 | end 646 | 647 | -- TESTING ---------------------------- 648 | local args = { ... } 649 | 650 | subject = args[1] or [=[ 651 | -- this comment won't be captured 652 | local a = 3 + -math.pi 653 | 654 | function b(ascii, ...) 655 | local t = { a = 1, ['b'] = {}, -3, .234 } 656 | 657 | while _VERSION > 5.1 do 658 | if x['a'] then 659 | x = false 660 | return x, 1, 2, 3, -4 661 | else 662 | break 663 | end 664 | end 665 | 666 | end 667 | 668 | local function ascii2() return [[ascii2!]], whatever end 669 | ]=] 670 | 671 | print('subject:', '\n'..subject) 672 | print('result:', '\n'..ast2string(build(subject), 80, 'nohash')) 673 | -------------------------------------------------------------------------------- /examples/switch-macro.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- 3 | -- A preprocessor which transforms a switch construct (called 'match' here) 4 | -- into a sequence of if-elseif blocks. There was quite a discussion going on 5 | -- (again...) about adding switch statements to Lua 6 | -- (http://lua-users.org/lists/lua-l/2007-11/msg00099.html), so I decided to 7 | -- use it as a demonstration of Leg's capabilities. 8 | -- 9 | -- Author: Humberto Anjos 10 | -- 11 | -- $Id: switch-macro.lua,v 1.3 2007/12/07 14:23:56 hanjos Exp $ 12 | -- 13 | ------------------------------------------------------------------------------- 14 | 15 | -- imports 16 | local lpeg = require 'lpeg' 17 | 18 | require "leg" -- check if available (and preload leg submodules) 19 | local parser = require 'leg.parser' 20 | 21 | -- aliasing 22 | local P, V, C, Ct, Cs = lpeg.P, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs 23 | 24 | -- command-line arguments 25 | local args = { ... } 26 | 27 | -- the code to parse 28 | subject = args[1] or [=[ 29 | match assert(io.open(file, '*a')) 30 | -- no cases here, nothing is generated 31 | end 32 | 33 | local args = { ... } 34 | 35 | -- before match 36 | match #args 37 | -- before the first case 38 | when 0 do print 'No arguments!' 39 | -- before the second case 40 | when 1 do -- after case checking 41 | print('only one argument: ', args[1]) 42 | when 2 do 43 | print('two arguments: ', args[1], args[2]) 44 | 45 | local a = tonumber(args[1]) + tonumber(args[2]) 46 | io.write(a) io.flush() 47 | else -- before else block 48 | for _, v in ipairs(args) do 49 | print(v) 50 | end 51 | -- after else 52 | end 53 | -- after match 54 | 55 | function eval(node, ...) 56 | match node.tag 57 | when 'Const' do 58 | return node.value 59 | when (node.left and node.right) and 'BinOp' do 60 | -- this comment won't appear in the generated code 61 | local op, left, right = node.op, node.left, node.right 62 | -- but this one will 63 | return op(left, right) 64 | -- and this one too 65 | when node.operand and 'UnOp' do 66 | local op, operand = node.op, node.operand 67 | 68 | return op(operand) 69 | else 70 | match isList(node) -- internal match statement 71 | when true do visit(node) 72 | when false do error 'Invalid node!' 73 | end 74 | end 75 | end 76 | ]=] 77 | 78 | -- After reading several proposals on the Lua discussion list, I decided to 79 | -- implement this one: 80 | -- 81 | -- match 82 | -- when do 83 | -- when do 84 | -- ... 85 | -- when do 86 | -- else 87 | -- end 88 | -- 89 | -- where , , ... are Lua expressions. 90 | -- 91 | -- The construct above will be converted into the following code: 92 | -- 93 | -- do 94 | -- local __temp__ = 95 | -- if __temp__ == () then 96 | -- elseif __temp__ == () then 97 | -- ... 98 | -- elseif __temp__ == () then 99 | -- else end 100 | -- end 101 | -- 102 | -- Implementation notes: 103 | -- 104 | -- * Technically, the local variable __temp__ should receive a name provably 105 | -- unique in the program. But, for this example, naming it __temp__ and 106 | -- restricting its scope will do the trick. 107 | -- 108 | -- * The default case, if present, must be the last clause. 109 | -- 110 | -- * If there's only the default case, the local declaration and the default 111 | -- case's block will be generated, without an enclosing if statement. 112 | -- 113 | -- * If there are no cases and no default case, only the local declaration will 114 | -- be generated. 115 | -- 116 | -- * The local declaration will always be generated, because the expression 117 | -- in the local declaration might have side effects, which affect program 118 | -- semantics even if no match is made. 119 | -- 120 | -- * Some comments are captured, some are not. The comments captured are those 121 | -- which are in the middle or at the end of a statement, and are 122 | -- captured along with the block. The other ones are matched as part of the 123 | -- spacing, and consequently not captured. 124 | -- 125 | -- * This is an obvious one, but: since the result is a series of if-elseif 126 | -- blocks, there is no fallthrough. 127 | -- 128 | -- * A reasonable improvement would be allowing a case clause to have several 129 | -- possible matches, generating something like 130 | -- if __temp__ == () or __temp__ == () then ... 131 | -- 132 | -- This is left as an exercise to the reader *shameless cop-out*. 133 | 134 | -- spacing 135 | local S = V'IGNORED' -- parser.rules.IGNORED could be used 136 | 137 | -- epsilon rule 138 | local EPSILON = V'EPSILON' / function () end 139 | 140 | -- new matching rule. Notice that the Block rule has no captures. 141 | local Match = (P'match' *S* C(V'Exp') *S* 142 | Ct((P'when' *S* C(V'Exp') *S* P'do' *S* V'Block')^0) *S* 143 | ((P'else' *S* V'Block') + EPSILON) *S* P'end') 144 | / function (exp, cases, default) 145 | if #cases == 0 then -- no case clauses 146 | if default then -- return the local declaration and the block 147 | return 'do local __temp__ = ('..exp..') '..default..' end' 148 | else -- generate just the local declaration 149 | return 'do local __temp__ = ('..exp..') end' 150 | end 151 | else -- there's at least one clause 152 | local str = 'do local __temp__ = ('..exp..') ' 153 | 154 | -- generating a new if or elseif block 155 | for i = 1, #cases - 3, 2 do 156 | str = str..'if __temp__ == ('..cases[i]..') then ' 157 | ..cases[i + 1]..' else' 158 | end 159 | 160 | -- the last case clause 161 | str = str..'if __temp__ == ('..cases[#cases - 1]..') then ' 162 | ..cases[#cases] 163 | 164 | if default then -- generate the else block 165 | str = str..' else '..default..' end' 166 | else -- no else, just finish it 167 | str = str..' end' -- end if-elseif chain 168 | end 169 | 170 | return str..' end' -- end do 171 | end 172 | end 173 | 174 | -- creating the LPeg pattern 175 | local oldStat, oldBlock = parser.rules.Stat, parser.rules.Block 176 | 177 | local MATCH = P( parser.apply { 178 | -- adding Match to the list of valid Statements 179 | Stat = oldStat + Match, 180 | 181 | -- the Block rule needs to be updated as well, in order to make the 182 | -- necessary substitutions to inner Match statements 183 | Block = Cs(oldBlock) 184 | } ) 185 | 186 | print('subject:', '\n'..subject) 187 | print('result:', '\n'..MATCH:match(subject)) 188 | -------------------------------------------------------------------------------- /examples/syntax-highlighting.lua: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keplerproject/leg/dcd25ff58eab815cb158afcf4cc541281bc36408/examples/syntax-highlighting.lua -------------------------------------------------------------------------------- /examples/vararg-indexing.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- 3 | -- A preprocessor which uses Leg to transform ...[] into 4 | -- select(, ...). 5 | -- 6 | -- Author: Humberto Anjos 7 | -- 8 | -- $Id: vararg-indexing.lua,v 1.3 2007/12/07 14:23:56 hanjos Exp $ 9 | -- 10 | ------------------------------------------------------------------------------- 11 | 12 | -- ye olde imports 13 | local lpeg = require 'lpeg' 14 | 15 | require "leg" -- check if available (and preload leg submodules) 16 | local parser = require 'leg.parser' 17 | 18 | -- some aliasing to save my poor fingertips 19 | local V, P, Cs = lpeg.V, lpeg.P, lpeg.Cs 20 | 21 | -- argument processing 22 | local args = { ... } 23 | 24 | -- the code to parse 25 | subject = args[1] or [=[ 26 | local arg1, arg2, arg3 = ...[1], ... [ -2 +x[[whatever, man]]^t[5] ], ... 27 | -- Oh my G0dZ, a comment in the middle !!1!one!1! This will disappear 28 | [-(-3)] 29 | 30 | if do_or_die() then -- inside a block 31 | return ...[BOOM_baby(...[2], 'boink!')] -- inside an expression 32 | end 33 | 34 | -- ...['inside a comment'] 35 | 36 | a = " ...['inside a string!'] " 37 | ]=] 38 | 39 | -- spacing rule 40 | local S = parser.rules.IGNORED -- V'IGNORED' could be used 41 | 42 | -- a pattern which matches any instance of ...[] and returns 43 | -- 'select(, ...)'. You need parser.apply because the definition of Exp is 44 | -- recursive, and needs roughly half of Lua's grammar to work. One could try to 45 | -- work out which rules are actually needed, but just using the whole damn 46 | -- thing is so much easier... 47 | local oldExp = parser.rules.Exp 48 | 49 | local VARARG = P( parser.apply ( 50 | { -- the rule table 51 | 52 | -- matching ...[]. We'll use lpeg.Cs for the substitution. 53 | VarargIndex = V'...' *S* V'[' *S* V'Exp' *S* V']', 54 | 55 | -- VarargIndex is now a valid subexpression. Using lpeg.Cs ensures that 56 | -- inner VarargIndexes will be substituted as well. VarargIndex must be 57 | -- matched before oldExp or the ... will be understood as a normal 58 | -- ellipsis. 59 | Exp = Cs(V'VarargIndex' + oldExp), 60 | }, 61 | { -- the capture table 62 | VarargIndex = function (exp) 63 | return 'select('..exp..', ...)' 64 | end 65 | }) ) 66 | 67 | -- a pattern which does the substitution with Cs 68 | local ALL = Cs(VARARG) 69 | 70 | -- printing the results 71 | print('subject:', '\n'..subject) 72 | print('result:', '\n'..ALL:match(subject)) 73 | -------------------------------------------------------------------------------- /release: -------------------------------------------------------------------------------- 1 | $Id: release,v 1.2 2007/11/26 18:41:51 hanjos Exp $ 2 | 3 | Version: 0.1.2 4 | ============== 5 | 6 | API 7 | === 8 | 9 | * grammar.lua received two new utility functions: anyOf and listOf 10 | 11 | Bugs Fixed 12 | ========== 13 | 14 | * adding scanner's keywords e symbols to parser's rules ended up altering 15 | scanner.keywords and scanner.symbols as well; using grammar.complete instead 16 | of grammar.apply does the trick. 17 | 18 | * grammar.apply was inadvertently piping the captures to grammar instead of 19 | rules if rules was nil. 20 | 21 | Documentation 22 | ============= 23 | 24 | * changes in the documentation generator made the new docs slightly different; 25 | no big conceptual change. 26 | 27 | * an improved README file 28 | 29 | * a COPYRIGHT file with Leg's license 30 | 31 | * and this release file -------------------------------------------------------------------------------- /rockspec/leg-0.1.2-1.rockspec: -------------------------------------------------------------------------------- 1 | -- Package metadata 2 | package = 'Leg' 3 | version = '0.1.2-1' 4 | description = { 5 | summary = 'A Lua 5.1 grammar', 6 | detailed = [[ 7 | Leg offers a complete Lua 5.1 grammar, 8 | along with a small API for user manipulation. 9 | ]], 10 | license = 'MIT/X11', 11 | homepage = 'http://leg.luaforge.net/', 12 | } 13 | 14 | -- Dependency information 15 | dependencies = { 16 | 'lpeg >= 0.6', 17 | 'lua >= 5.1', 18 | } 19 | 20 | -- Build rules 21 | source = { 22 | url = 'http://luaforge.net/frs/download.php/2728/leg-0.1.2.tar.gz', 23 | dir = 'leg', 24 | } 25 | 26 | build = { 27 | type = 'make', 28 | install_variables = { 29 | LUA_LIB = "$(LUADIR)", 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /rockspec/leg-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | -- Package metadata 2 | package = 'Leg' 3 | version = 'scm-1' 4 | description = { 5 | summary = 'A Lua 5.1 grammar', 6 | detailed = [[ 7 | Leg offers a complete Lua 5.1 grammar, 8 | along with a small API for user manipulation. 9 | ]], 10 | license = 'MIT/X11', 11 | homepage = 'http://leg.luaforge.net/', 12 | } 13 | 14 | -- Dependency information 15 | dependencies = { 16 | 'lpeg >= 0.6', 17 | 'lua >= 5.1', 18 | } 19 | 20 | -- Build rules 21 | source = { 22 | url = 'git://github.com/keplerproject/leg.git', 23 | } 24 | 25 | build = { 26 | type = 'make', 27 | install_variables = { 28 | LUA_LIB = "$(LUADIR)", 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/grammar.lua: -------------------------------------------------------------------------------- 1 | --[=[ 2 | <% 3 | project.title = "grammar" 4 | project.description = "LPeg grammar manipulation" 5 | project.version = "0.2" 6 | project.date = _G.os.date'%B %d, %Y' 7 | project.modules = { 'grammar', 'parser' } 8 | %> 9 | 10 | # Description 11 | 12 | This module defines a handful of operations which can be applied to 13 | [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg] patterns and grammars in 14 | general. 15 | 16 | # Dependencies 17 | 18 | * [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg]. 19 | 20 | # Operations 21 | 22 | ## Piping 23 | 24 | Pattern matching dissociates the notion of *matching* from the notion of 25 | *capturing*: matching checks if a given string follows a certain pattern, 26 | and capturing generates values according to the match made. This division 27 | allows interesting possibilities: 28 | 29 | * different problems can be solved by applying different captures to the same grammar; 30 | * captures may be defined separately; 31 | * captures may be done on top of other captures. 32 | 33 | Accounting for the first and second bullets, the grammar given in 34 | [parser.html parser] has no captures, enabling the user to reuse it to solve any 35 | problems that require a Lua grammar. One good example is documentation 36 | generation, described in a little more detail [#section_Example below]. 37 | 38 | The third bullet depicts a more interesting idea: a capture might take the 39 | result of another capture as input, doing a further transformation of the 40 | original data. This capture chaining, with the latter ones using the former's 41 | output as its input, is very similar to [http://en.wikipedia.org/wiki/Pipeline_%28Unix%29 Unix pipelines], 42 | so this mechanism was named **piping**. 43 | 44 | ## Completing 45 | 46 | With piping, several levels of captures can be chained together up to the 47 | most appropriate for the task at hand. Yet some levels might require extra rules, and modifications to existing ones, to ensure proper matching. 48 | 49 | To avoid manual copying, the new grammar should redefine only the necessary 50 | rules, copying the rest from the older grammar. This action is dubbed 51 | **completing**. 52 | 53 | ## Applying 54 | 55 | Once a new rule set is created and [#section_Completing completed], and 56 | all captures are correctly [#section_Piping piped], all that's left is 57 | to put them together, a process called **applying**. The result is a grammar ready for [http://www.inf.puc-rio.br/~roberto/lpeg.html#lpeg lpeg.P] 58 | consumption, whose pattern will return the intended result when a match is made. 59 | 60 | ## Example 61 | 62 | Let's consider the problem of documenting a Lua module. In this case, comments 63 | must be captured before every function declaration when in the outermost scope: 64 | 65 | `` 66 | -- -- the code to parse 67 | subject = %[%[ 68 | -- -- Calculates the sum a+b. 69 | -- -- An extra line. 70 | function sum (a, b) 71 | -- -- code 72 | end 73 | 74 | -- -- f1: assume a variable assignment is not a proper declaration for an 75 | -- -- exported function 76 | f1 = function () 77 | -- -- code 78 | end 79 | 80 | while true do 81 | -- -- this function is not in the outermost scope 82 | function aux() end 83 | end 84 | 85 | function something:other(a, ...) 86 | -- -- a global function without comments 87 | end 88 | %]%] 89 | `` 90 | 91 | In the code above only `sum` and `something:other` should be documented, as `f1` isn't properly (by our standards) declared and `aux` is not in the outermost scope. 92 | 93 | By combining [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg] and the modules [parser.html parser] and [grammar.html grammar], this specific problem can be solved as follows: 94 | 95 | `` 96 | -- -- ye olde imports 97 | local parser, grammar = require 'leg.parser', require 'leg.grammar' 98 | local lpeg = require 'lpeg' 99 | 100 | -- -- a little aliasing never hurt anyone 101 | local P, V = lpeg.P, lpeg.V 102 | 103 | -- -- change only the initial rule and make no captures 104 | patt = grammar.apply(parser.rules, parser.COMMENT^-1 %* V'GlobalFunction', nil) 105 | 106 | -- -- transform the new grammar into a LPeg pattern 107 | patt = P(patt) 108 | 109 | -- -- making a pattern that matches any Lua statement, also without captures 110 | Stat = P( grammar.apply(parser.rules, V'Stat', nil) ) 111 | 112 | -- -- a pattern which matches function declarations and skips statements in 113 | -- -- inner scopes or undesired tokens 114 | patt = (patt + Stat + parser.ANY)^0 115 | 116 | -- -- matching a string 117 | patt:match(subject) 118 | `` 119 | 120 | These are the relevant rules in [parser.html#section_The_Grammar the grammar]: 121 | 122 | `` 123 | GlobalFunction = 'function' %* FuncName %* FuncBody 124 | FuncName = ID %* ('.' %* ID)^0 %* (':' %* ID)^-1 125 | FuncBody = '(' %* (ParList + EPSILON) %* ')' %* Block %* 'end' 126 | ParList = NameList %* (',' %* '...')^-1 127 | NameList = ID %* (',' %* ID)^0 128 | ID = parser.IDENTIFIER 129 | EPSILON = P(true) 130 | `` 131 | 132 | It may seem that `ParList + EPSILON` could be substituted for `ParList^-1` (optionally match `ParList`), but then no captures would be made for empty parameter lists, and `GlobalFunction` would get all strings matched by `FuncBody`. The `EPSILON` rule acts in this manner as a placeholder in the argument list, avoiding any argument list processing in the capture function. 133 | 134 | Since no captures are being made, [http://www.inf.puc-rio.br/~roberto/lpeg.html#basic lpeg.match] doesn't return anything interesting. Here are some possible captures: 135 | 136 | `` 137 | -- -- some interesting captures bundled up in a table. Note that the table keys 138 | -- -- match the grammar rules we want to add captures to. Whatever rules aren't in 139 | -- -- the rules table below will come from parser.rules . 140 | captures = { 141 | %[1%] = function (...) -- the initial rule 142 | return '<function>'..table.concat{...}..'</function>' 143 | end, 144 | 145 | GlobalFunction = function (name, parlist) 146 | return '<name>'..name..'</name><parlist>'..(parlist or '')..'</parlist>' 147 | end, 148 | 149 | FuncName = grammar.C, -- capture the raw text 150 | ParList = grammar.C, -- capture the raw text 151 | COMMENT = parser.comment2text, -- remove the comment trappings 152 | } 153 | 154 | -- -- spacing rule 155 | local S = parser.SPACE ^ 0 156 | 157 | -- -- rules table 158 | rules = { 159 | %[1%] = ((V'COMMENT' %*S) ^ 0) %*S%* V'GlobalFunction', 160 | COMMENT = parser.COMMENT, 161 | } 162 | 163 | -- -- building the new grammar and adding the captures 164 | patt = P( grammar.apply(parser.rules, rules, captures) ) 165 | 166 | -- -- a pattern that matches a sequence of patts and concatenates the results 167 | patt = (patt + Stat + parser.ANY)^0 / function(...) 168 | return table.concat({...}, '\n\n') -- some line breaks for easier reading 169 | end 170 | 171 | -- -- finally, matching a string 172 | print(patt:match(subject)) 173 | `` 174 | 175 | `FuncBody` needs no captures, as `Block` and all its non-terminals have none; it 176 | just needs to pass along any captures made by `ParList`. `NameList` and `ID` also have no captures, and the whole subject string is passed further. 177 | 178 | The printed result is: 179 |
180 | <function>Calculates the sum a+b. An extra line.<name>sum</name><parlist>a, b</parlist></function>
181 | 
182 | <function><name>something:other</name><parlist>a, ...</parlist></function> 183 |
184 | --]=] 185 | 186 | -- $Id: grammar.lua,v 1.4 2007/12/07 14:23:56 hanjos Exp $ 187 | 188 | -- basic modules 189 | local _G = _G 190 | local table = table 191 | 192 | -- basic functions 193 | local assert = assert 194 | local ipairs = ipairs 195 | local pairs = pairs 196 | local pcall = pcall 197 | local type = type 198 | local unpack = unpack 199 | 200 | -- imported modules 201 | local lpeg = require 'lpeg' 202 | 203 | -- imported functions 204 | local P, V = lpeg.P, lpeg.V 205 | 206 | -- module declaration 207 | local grammar = {} -- the leg.grammar module 208 | 209 | local anyOf, oneOf, listOf, anywhere, C, Ct, copy, complete, pipe, apply, pmatch 210 | 211 | --[[ 212 | Returns a pattern which matches any of the patterns in `t`. 213 | 214 | The iterator `pairs` is used to traverse `t`, so no particular traversal order 215 | is guaranteed. Use [#function_oneOf oneOf] to ensure sequential matching 216 | attempts. 217 | 218 | **Example:** 219 | `` 220 | local g, p, m = require 'leg.grammar', require 'leg.parser', require 'lpeg' 221 | 222 | -- -- match numbers or operators, capture the numbers 223 | print( (g.anyOf { '+', '-', '%*', '/', m.C(p.NUMBER) }):match '34.5@23 %* 56 / 45 - 45' ) 224 | -- --> prints 34.5 225 | `` 226 | 227 | **Parameters:** 228 | * `t`: a table with LPeg patterns as values. The keys are ignored. 229 | 230 | **Returns:** 231 | * a pattern which matches any of the patterns received. 232 | --]] 233 | function anyOf(t) 234 | local patt = P(false) 235 | 236 | for _, v in pairs(t) do 237 | patt = P(v) + patt 238 | end 239 | 240 | return patt 241 | end 242 | 243 | --[[ 244 | Returns a pattern which matches any of the patterns in `list`. 245 | 246 | Differently from [#function_anyOf anyOf], this function ensures sequential 247 | traversing. 248 | 249 | **Parameters:** 250 | * `list`: a list of LPeg patterns. 251 | 252 | **Returns:** 253 | * a pattern which matches any of the patterns received. 254 | --]] 255 | function oneOf(list) 256 | local patt = P(false) 257 | 258 | for _, v in ipairs(list) do 259 | patt = P(v) + patt 260 | end 261 | 262 | return patt 263 | end 264 | 265 | --[=[ 266 | Returns a pattern which matches a list of `patt`s, separated by `sep`. 267 | 268 | **Example:** matching comma-separated values: 269 | `` 270 | local g, m = require 'leg.grammar', require 'lpeg' 271 | 272 | -- -- separator 273 | local sep = m.P',' + m.P'\n' 274 | 275 | -- -- element: anything but sep, capture it 276 | local elem = m.C((1 - sep)^0) 277 | 278 | -- -- pattern 279 | local patt = g.listOf(elem, sep) 280 | 281 | -- -- matching 282 | print( patt:match %[%[a, b, 'christmas eve' 283 | d, evening; mate! 284 | f%]%]) 285 | -- --> prints out "a b 'christmas eve' d evening; mate! f" 286 | `` 287 | 288 | **Parameters:** 289 | * `patt`: a LPeg pattern. 290 | * `sep`: a LPeg pattern. 291 | 292 | **Returns:** 293 | * the following pattern: ``patt %* (sep %* patt)^0`` 294 | --]=] 295 | function listOf(patt, sep) 296 | patt, sep = P(patt), P(sep) 297 | 298 | return patt * (sep * patt)^0 299 | end 300 | 301 | --[[ 302 | Returns a pattern which searches for the pattern `patt` anywhere in a string. 303 | 304 | This code was extracted from the [http://www.inf.puc-rio.br/~roberto/lpeg.html#ex LPeg home page], in the examples section. 305 | 306 | **Parameters:** 307 | * `patt`: a LPeg pattern. 308 | 309 | **Returns:** 310 | * a LPeg pattern which searches for `patt` anywhere in the string. 311 | --]] 312 | function anywhere(patt) 313 | return P { P(patt) + 1 * V(1) } 314 | end 315 | 316 | --[[ 317 | A capture function, made so that `patt / C` is equivalent to `m.C(patt)`. It's intended to be used in capture tables, such as those required by [#function_pipe pipe] and [#function_apply apply]. 318 | --]] 319 | function C(...) return ... end 320 | 321 | --[[ 322 | A capture function, made so that `patt / Ct` is equivalent to `m.Ct(patt)`. It's intended to be used in capture tables, such as those required by [#function_pipe pipe] and [#function_apply apply]. 323 | --]] 324 | function Ct(...) return { ... } end 325 | 326 | --[[ 327 | Creates a shallow copy of `grammar`. 328 | 329 | **Parameters:** 330 | * `grammar`: a regular table. 331 | 332 | **Returns:** 333 | * a newly created table, with `grammar`'s keys and values. 334 | --]] 335 | function copy(grammar) 336 | local newt = {} 337 | 338 | for k, v in pairs(grammar) do 339 | newt[k] = v 340 | end 341 | 342 | return newt 343 | end 344 | 345 | --[[ 346 | [#section_Completing Completes] `dest` with `orig`. 347 | 348 | **Parameters:** 349 | * `dest`: the new grammar. Must be a table. 350 | * `orig`: the original grammar. Must be a table. 351 | 352 | **Returns:** 353 | * `dest`, with new rules inherited from `orig`. 354 | --]] 355 | function complete (dest, orig) 356 | for rule, patt in pairs(orig) do 357 | if not dest[rule] then 358 | dest[rule] = patt 359 | end 360 | end 361 | 362 | return dest 363 | end 364 | 365 | --[[ 366 | [#section_Piping Pipes] the captures in `orig` to the ones in `dest`. 367 | 368 | `dest` and `orig` should be tables, with each key storing a capture function. Each capture in `dest` will be altered to use the results for the matching one in `orig` as input, using function composition. Should `orig` possess keys not in `dest`, `dest` will copy them. 369 | 370 | **Parameters:** 371 | * `dest`: a capture table. 372 | * `orig`: a capture table. 373 | 374 | **Returns:** 375 | * `dest`, suitably modified. 376 | --]] 377 | function pipe (dest, orig) 378 | for k, vorig in pairs(orig) do 379 | local vdest = dest[k] 380 | if vdest then 381 | dest[k] = function(...) return vdest(vorig(...)) end 382 | else 383 | dest[k] = vorig 384 | end 385 | end 386 | 387 | return dest 388 | end 389 | 390 | --[[ 391 | [#section_Completing Completes] `rules` with `grammar` and then [#Applying applies] `captures`. 392 | 393 | `rules` can either be: 394 | * a single pattern, which is taken to be the new initial rule, 395 | * a possibly incomplete LPeg grammar table, as per [#function_complete complete], or 396 | * `nil`, which means no new rules are added. 397 | 398 | `captures` can either be: 399 | * a capture table, as per [#function_pipe pipe], or 400 | * `nil`, which means no captures are applied. 401 | 402 | **Parameters:** 403 | * `grammar`: the old grammar. It stays unmodified. 404 | * `rules`: optional, the new rules. 405 | * `captures`: optional, the final capture table. 406 | 407 | **Returns:** 408 | * `rules`, suitably augmented by `grammar` and `captures`. 409 | --]] 410 | function apply (grammar, rules, captures) 411 | if rules == nil then 412 | rules = {} 413 | elseif type(rules) ~= 'table' then 414 | rules = { rules } 415 | end 416 | 417 | complete(rules, grammar) 418 | 419 | if type(grammar[1]) == 'string' then 420 | rules[1] = V(grammar[1]) 421 | end 422 | 423 | if captures ~= nil then 424 | assert(type(captures) == 'table', 'captures must be a table') 425 | 426 | for rule, cap in pairs(captures) do 427 | rules[rule] = rules[rule] / cap 428 | end 429 | end 430 | 431 | return rules 432 | end 433 | 434 | --[[ 435 | Returns a pattern which simply fails to match if an error is thrown during the matching. 436 | 437 | One usage example is [parser.html#variable_NUMBER parser.NUMBER]. Originally it threw an error when trying to match a malformed number (such as 1e23e4), since in this case the input is obviously invalid and the pattern would be part of the Lua grammar. So [#function_pmatch pmatch] is used to catch the error and return `nil` (signalling a non-match) and the error message. 438 | 439 | **Parameters:** 440 | * `patt`: a LPeg pattern. 441 | 442 | **Returns:** 443 | * a pattern which catches any errors thrown during the matching and simply doesn't match instead of propagating the error. 444 | --]] 445 | function pmatch(patt) 446 | patt = P(patt) 447 | return P(function (subject, i) 448 | local results = { pcall(patt.match, patt, subject, i) } 449 | local status = table.remove(results, 1) 450 | 451 | if status then 452 | return unpack(results) 453 | else 454 | return nil, unpack(results) 455 | end 456 | end) 457 | end 458 | 459 | grammar.anyOf = anyOf 460 | grammar.oneOf = oneOf 461 | grammar.listOf = listOf 462 | grammar.anywhere = anywhere 463 | grammar.C = C 464 | grammar.Ct = Ct 465 | grammar.copy = copy 466 | grammar.complete = complete 467 | grammar.pipe = pipe 468 | grammar.apply = apply 469 | grammar.pmatch = pmatch 470 | 471 | return grammar 472 | -------------------------------------------------------------------------------- /src/init.lua: -------------------------------------------------------------------------------- 1 | --[[ 2 | <% 3 | project.title = "Leg" 4 | project.description = "LPeg-powered Lua 5.1 grammar" 5 | project.version = "0.2" 6 | project.date = _G.os.date'%B %d, %Y' 7 | project.modules = { 'grammar', 'parser' } 8 | %> 9 | 10 | # Overview 11 | 12 | Leg is a Lua library which offers a complete Lua 5.1 grammar, along with some functions to use and modify it. Some examples of projects which could benefit from Leg are a syntax highlighter, a Luadoc-style document generator, and a macro preprocessor. 13 | 14 | Leg uses [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg] for pattern matching, and returns [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg] patterns for user manipulation. 15 | 16 | Leg is available under the same [#section_License license] as Lua 5.1. 17 | 18 | # Dependencies 19 | 20 | * [http://www.inf.puc-rio.br/~roberto/lpeg.html LPeg] 21 | 22 | # Download 23 | 24 | Leg can be downloaded from its [http://luaforge.net/projects/leg/ LuaForge page]. 25 | 26 | # Credits 27 | 28 | This project is maintained by Humberto Anjos, and was adapted from an earlier project done with Francisco Sant'Anna. 29 | 30 | # License 31 | 32 | Copyright © 2007 Humberto Saraiva Nazareno dos Anjos. 33 | 34 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 35 | 36 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 37 | 38 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 39 | --]] 40 | 41 | 42 | -- $Id: init.lua,v 1.3 2007/12/07 14:23:56 hanjos Exp $ 43 | 44 | local grammar = require 'leg.grammar' 45 | local parser = require 'leg.parser' 46 | 47 | return { 48 | grammar = grammar, 49 | parser = parser, 50 | } 51 | 52 | -------------------------------------------------------------------------------- /src/parser.lua: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keplerproject/leg/dcd25ff58eab815cb158afcf4cc541281bc36408/src/parser.lua -------------------------------------------------------------------------------- /tests/test.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | -- A test suite for Leg 3 | -- 4 | -- Author: Humberto Anjos 5 | -- Copyright (c) 2007 Leg 6 | -- 7 | -- $Id: test.lua,v 1.4 2007/12/07 14:23:56 hanjos Exp $ 8 | -- 9 | ------------------------------------------------------------------------------- 10 | 11 | print '==================== PARSER =====================' 12 | dofile 'test_parser.lua' 13 | print () 14 | print 'All done!' -------------------------------------------------------------------------------- /tests/test_parser.lua: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keplerproject/leg/dcd25ff58eab815cb158afcf4cc541281bc36408/tests/test_parser.lua --------------------------------------------------------------------------------