├── ABOUT
├── CHANGELOG.md
├── LICENSE
├── README.md
├── TODO.md
├── doc
    └── USAGE.md
├── src
    ├── lpcap.lua
    ├── lpcode.lua
    ├── lpeglj.lua
    ├── lpprint.lua
    ├── lpvm.lua
    └── re.lua
└── tests
    ├── loadtest.lua
    ├── streamtest.lua
    ├── streamtest2.lua
    ├── test.lua
    └── testlr.lua


/ABOUT:
--------------------------------------------------------------------------------
 1 | LPeg Parser in LuaJIT
 2 | based on LPeg v1.0 - PEG pattern matching for Lua
 3 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
 4 | http://www.inf.puc-rio.br/~roberto/lpeg/
 5 | 
 6 | left recursion support based on Sérgio Medeiros algorithm
 7 | http://arxiv.org/abs/1207.0443
 8 | 
 9 | The re.lua and the test.lua are taken from
10 | original LPeg distribution.
11 | 
12 | Released under MIT License
13 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 1.0.0.0 : September 30, 2015
 2 | - include changes and bug fixes from LPeg v1.0 
 3 | - added VM runtime listing (tracing) for debugging purposes
 4 | 
 5 | 0.12.2 : July 10, 2014
 6 | 
 7 | - added restricted memoization
 8 | - stream support (infinite parsing)
 9 | 
10 | 0.12.1 : December 30, 2013
11 | 
12 | - speed improvement
13 | - support direct and indirect left recursion based on Sérgio Medeiros algorithm (http://arxiv.org/abs/1207.0443)
14 | - loading and saving patterns
15 | 
16 | 0.12 : July 14, 2013: Initial release
17 | 
18 | - LPeg Parser in pure LuaJIT based on LPeg v.12


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ** Modules:
 2 | ** lpcap.lua, lpcode.lua, lpeglj.lua, lpprint.lua, lpvm.lua
 3 | ** testlr.lua
 4 | ** Copyright (C) 2014 Rostislav Sacek.
 5 | **
 6 | ** Modules:
 7 | ** re.lua, test.lua
 8 | ** Copyright (C) 2013 Lua.org, PUC-Rio.
 9 | **
10 | ** Licence:
11 | ** Permission is hereby granted, free of charge, to any person obtaining
12 | ** a copy of this software and associated documentation files (the
13 | ** "Software"), to deal in the Software without restriction, including
14 | ** without limitation the rights to use, copy, modify, merge, publish,
15 | ** distribute, sublicense, and/or sell copies of the Software, and to
16 | ** permit persons to whom the Software is furnished to do so, subject to
17 | ** the following conditions:
18 | **
19 | ** The above copyright notice and this permission notice shall be
20 | ** included in all copies or substantial portions of the Software.
21 | **
22 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
26 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
27 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
28 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 | **
30 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
31 | 
32 | -----------------------------------------------------------------------------
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | LPegLJ v1.0
 2 | =============
 3 | 
 4 | LPeg Parser in pure LuaJIT  
 5 | (straight Lua + FFI translation of LPeg C code)   
 6 | based on LPeg v1.0 - PEG pattern matching for Lua  
 7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy  
 8 | http://www.inf.puc-rio.br/~roberto/lpeg/
 9 | 
10 | left recursion support is based on Sérgio Medeiros algorithm
11 | http://arxiv.org/abs/1207.0443
12 | 
13 | ### Usage:  
14 | ```Lua
15 | local lpeglj = require"lpeglj"  
16 | local pattern = lpeglj.P("a") 
17 | -- then:
18 | lpeglj.match(pattern, "a") 
19 | -- or, equivalently:  
20 | pattern:match("a")  
21 | ```
22 | 
23 | ### Compatibility:
24 | 
25 | - full syntactical and functional backward compatibility with LPeg v1.0
26 | - works only with LuaJIT 2.x  
27 | 
28 | ### Differences from LPeg v1.0:
29 | 
30 | Description in doc/USAGE.md
31 | 
32 | - LPegLJ supports direct and indirect left recursion based on Sérgio Medeiros algorithm (http://arxiv.org/abs/1207.0443)
33 | - patterns can be saved and loaded
34 | - supports memoization (restricted) - useful for complex grammars
35 | - can be used in stream mode (infinite parsing)
36 | - VM action runtime listing (tracing) for debugging purposes
37 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | ### Grammar checking
 2 | #### Check PEG and left recursion rules for right order.
 3 | 
 4 | E <- ('a' / 'aa') 'b'
 5 | 
 6 | E <- 'a' / E 'a'
 7 | 
 8 | ### Left factorization
 9 | 
10 | ### Runtime capture
11 | Add commit and revert functions (for creating tables via Cmt).
12 | 
13 | ### Rule profiling
14 | 
15 | ### Performance tests
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/doc/USAGE.md:
--------------------------------------------------------------------------------
  1 | LPegLJ 1.0.0.0LJ
  2 | ===========
  3 | ## New functions:
  4 | ###Loading and saving patterns:
  5 | ####pat:save(fname, [tree])
  6 | Save pattern to file.
  7 | 
  8 | fname - file name for pattern
  9 | 
 10 | tree - full pattern tree is saved - later modification is possible
 11 | ####pat:dump([tree])
 12 | Dump pattern to string.
 13 |  
 14 | tree - full pattern tree is saved - later modification is possible
 15 | ####lpeg.loadfile(fname, [fsymbols])
 16 | Load pattern from file.
 17 | 
 18 | fname - file name with pattern
 19 | 
 20 | fsymbols - table with functions (key - symbolic name, value - function). This should be used only for functions with upvalues. 
 21 | 
 22 | ####lpeg.load(str, [fsymbols])
 23 | Load pattern from memory.
 24 | 
 25 | str - pattern in memory (string or ffi type)
 26 | 
 27 | fsymbols - table with functions (key - symbolic name, value - function). This should be used only for functions with upvalues.
 28 | 
 29 | ###Example:
 30 | ```Lua
 31 | local lpeglj = require"lpeglj"
 32 | local pat = lpeglj.P('abc')
 33 | pat:save("saved.pat")  -- save only pattern code
 34 | local savedpat = lpeglj.loadfile("saved.pat")
 35 | ```
 36 | ###Left recursion:
 37 | ####lpeglj.enableleftrecursion(set)
 38 | *set* - enable left recursion
 39 | ####lpeglj.V(v, p)
 40 | *p* - precedence level (number 1 to n)
 41 | ###Example:
 42 | ```Lua
 43 | local lpeglj = require"lpeglj"
 44 | lpeglj.enableleftrecursion(true)
 45 | local pat = m.P{
 46 |     "E",
 47 |     E = lpeglj.V("E", 1) * '+' * lpeglj.V("E", 2) +   -- left associative rule with low precedence
 48 |      lpeglj.V("E", 2) * '**' * lpeglj.V("E", 2) +     -- right associative rule with higher precedence
 49 |     'n'
 50 |     }
 51 | pat:match("n+n+n")
 52 | ```
 53 | ####using re module with precedence
 54 | ```Lua
 55 | local lpeglj = require"lpeglj"
 56 | local re = require"re"
 57 | lpeglj.enableleftrecursion(true)
 58 | local pat = [[
 59 |      E <- E:1 [+-] E:2 / -- left associativity
 60 |           E:2 [*/] E:3 /
 61 |           E:3 '**' E:3 / -- right associativity
 62 |           '-' E:4 /      -- highest precedence
 63 |           '(' E ')' /
 64 |           [0-9]+
 65 | ]]
 66 | re.match("-1*(6+2/4+3-1)**2", pat)
 67 | ```
 68 | ###Using memoization:
 69 | ####lpeglj.enablememoization(set)
 70 | *set* - enable memoization (true or false)
 71 | 
 72 | ###Using stream:
 73 | 
 74 | In stream mode all input data are copied into internal buffers. During parsing algorithm discards unused buffer (without link from stack or from captures stack).
 75 | Captures are generated and removed from capture stack in this condition: capture are not in unsolved alternative and capture is not open (should be complete). 
 76 | Algorithm generates only complete capture on highest level. Nested captures are generated after higher level captures are completed. 
 77 | 
 78 | ####lpeglj.streammatch(pat, init, ...)
 79 | *pat* - pattern   
 80 | *init* - start position in stream (should be positive number)  
 81 | *...* - another parameters (same as in lpeg.match function)  
 82 | 
 83 | Returns function **func**. This function is called with string data from stream.    
 84 |   
 85 | ####func(str, eos)
 86 | *str* - string input (string)  
 87 | *eos* - end of stream (boolean)  
 88 | Returns **status** and capture(s)(if available) or position.     
 89 | 
 90 | **Status**:  
 91 |  1 - need another data   
 92 | -1 - parsing fail  
 93 |  0 - parsing finished    
 94 | 
 95 | Restrictions and differences for stream mode:  
 96 | 
 97 | - start position in stream should be positive number.
 98 | - whole string argument in match-time captures (Cmt and function) is not string but function.
 99 |   This function takes two arguments (start and end index of string in stream) and return string. 
100 |  
101 | ###Example:
102 | ```Lua
103 | local lpeglj = require"lpeglj"
104 | local pat = m.C("abc") * m.C("def")
105 | local fce = pat:streammatch()
106 | local st = fce("ab") -- return 1 - need another data
107 | local st, cap = fce("c") -- return 1 , "abc"  - capture and need another data
108 | local st, cap = fce("def") -- return 0 , "def"  - capture and finish parsing
109 | ```
110 | 
111 | ####lpeglj.setmaxbehind(val)
112 | *val* - max position before current position (number or nil for reset)
113 | 
114 | Function sets maximum position before current position. Buffer with this position can not be deleted.
115 | This function has meaning only for match-time captures which use first string argument. In this case 
116 | algorithm can not determinate range of requested string.       
117 | 
118 | #### re module
119 | 
120 | ####re.streammatch (pat, init)
121 | *pat* - pattern   
122 | *init* - start position in stream (should be positive number)  
123 | 
124 | Returns function **func**. This function is called with string data from stream.    
125 |   
126 | ####func(str, eos)
127 | *str* - string input (string)  
128 | *eos* - end of stream (boolean)  
129 | Returns **status** and captures or position.     
130 | 
131 | **Status**:  
132 |  1 - need another data   
133 | -1 - parsing fail  
134 |  0 - parsing finished    
135 | 
136 | ###Runtime tracing:  
137 | ####lpeg.enabletracing(set)  
138 | *set* - enable tracing (true or false)   
139 | 
140 | **Output format:**  
141 | ####Rule entry:  
142 | indent '+'[typ] rulename  
143 | 
144 | *indent* - nesting level  
145 | *typ* - type of call  
146 | - 'M' - memoized rule  
147 | - 'TC' - tail call  
148 | *rulename* - name of rule  
149 | 
150 | ####Rule match:  
151 | indent '='[typ] funcname [extra] subject [captures]  
152 | 
153 | *indent* - nesting level  
154 | *typ* - type of call  
155 | - 'M' - memoized rule  
156 | - 'IB' - increment bound (for left recursion)  
157 | *extra* - additional info for left recursion - level of IB  
158 | *subject* - corresponding part of input string (or stream)  
159 | *captures* - corresponding part of runtime captures   
160 | 
161 | ####Rule leave (fail):  
162 | indent '-' rulename  
163 | 
164 | *indent* - nesting level  
165 | *rulename* - name of rule  
166 | 


--------------------------------------------------------------------------------
/src/lpcap.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | LPEGLJ
  3 | lpcap.lua
  4 | Capture functions
  5 | Copyright (C) 2014 Rostislav Sacek.
  6 | based on LPeg v1.0 - PEG pattern matching for Lua
  7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
  8 | http://www.inf.puc-rio.br/~roberto/lpeg/
  9 | 
 10 | ** Permission is hereby granted, free of charge, to any person obtaining
 11 | ** a copy of this software and associated documentation files (the
 12 | ** "Software"), to deal in the Software without restriction, including
 13 | ** without limitation the rights to use, copy, modify, merge, publish,
 14 | ** distribute, sublicense, and/or sell copies of the Software, and to
 15 | ** permit persons to whom the Software is furnished to do so, subject to
 16 | ** the following conditions:
 17 | **
 18 | ** The above copyright notice and this permission notice shall be
 19 | ** included in all copies or substantial portions of the Software.
 20 | **
 21 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 22 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 23 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 24 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 25 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 26 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 27 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 28 | **
 29 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
 30 | --]]
 31 | local ffi = require "ffi"
 32 | 
 33 | local Cclose = 0
 34 | local Cposition = 1
 35 | local Cconst = 2
 36 | local Cbackref = 3
 37 | local Carg = 4
 38 | local Csimple = 5
 39 | local Ctable = 6
 40 | local Cfunction = 7
 41 | local Cquery = 8
 42 | local Cstring = 9
 43 | local Cnum = 10
 44 | local Csubst = 11
 45 | local Cfold = 12
 46 | local Cruntime = 13
 47 | local Cgroup = 14
 48 | 
 49 | local MAXSTRCAPS = 10
 50 | 
 51 | local pushcapture
 52 | local addonestring
 53 | 
 54 | 
 55 | -- Goes back in a list of captures looking for an open capture
 56 | -- corresponding to a close
 57 | 
 58 | local function findopen(cs, index)
 59 |     local n = 0; -- number of closes waiting an open
 60 |     while true do
 61 |         index = index - 1
 62 |         if cs.ocap[index].kind == Cclose then
 63 |             n = n + 1 -- one more open to skip
 64 |         elseif cs.ocap[index].siz == 0 then
 65 |             if n == 0 then
 66 |                 return index
 67 |             end
 68 |             n = n - 1
 69 |         end
 70 |     end
 71 | end
 72 | 
 73 | 
 74 | local function checknextcap(cs, captop)
 75 |     local cap = cs.cap;
 76 |     -- not a single capture?    ((cap)->siz != 0)
 77 |     if cs.ocap[cap].siz == 0 then
 78 |         local n = 0; -- number of opens waiting a close
 79 |         -- look for corresponding close
 80 |         while true do
 81 |             cap = cap + 1
 82 |             if cap > captop then return end
 83 |             if cs.ocap[cap].kind == Cclose then
 84 |                 n = n - 1
 85 |                 if n + 1 == 0 then
 86 |                     break;
 87 |                 end
 88 |             elseif cs.ocap[cap].siz == 0 then
 89 |                 n = n + 1
 90 |             end
 91 |         end
 92 |     end
 93 |     cap = cap + 1; -- + 1 to skip last close (or entire single capture)
 94 |     if cap > captop then return end
 95 |     return true
 96 | end
 97 | 
 98 | 
 99 | -- Go to the next capture
100 | 
101 | local function nextcap(cs)
102 |     local cap = cs.cap;
103 |     -- not a single capture?    ((cap)->siz != 0)
104 |     if cs.ocap[cap].siz == 0 then
105 |         local n = 0; -- number of opens waiting a close
106 |         -- look for corresponding close
107 |         while true do
108 |             cap = cap + 1
109 |             if cs.ocap[cap].kind == Cclose then
110 |                 n = n - 1
111 |                 if n + 1 == 0 then
112 |                     break;
113 |                 end
114 |             elseif cs.ocap[cap].siz == 0 then
115 |                 n = n + 1
116 |             end
117 |         end
118 |     end
119 |     cs.cap = cap + 1; -- + 1 to skip last close (or entire single capture)
120 | end
121 | 
122 | 
123 | -- Push on the Lua stack all values generated by nested captures inside
124 | -- the current capture. Returns number of values pushed. 'addextra'
125 | -- makes it push the entire match after all captured values. The
126 | -- entire match is pushed also if there are no other nested values,
127 | -- so the function never returns zero.
128 | 
129 | local function pushnestedvalues(cs, addextra, out, valuetable)
130 |     local co = cs.cap
131 |     cs.cap = cs.cap + 1
132 |     -- no nested captures?
133 |     if cs.ocap[cs.cap - 1].siz ~= 0 then
134 |         local st = cs.ocap[co].s
135 |         local l = cs.ocap[co].siz - 1
136 |         out.outindex = out.outindex + 1
137 |         out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1)
138 |         return 1; -- that is it
139 |     else
140 |         local n = 0;
141 |         while cs.ocap[cs.cap].kind ~= Cclose do -- repeat for all nested patterns
142 |         n = n + pushcapture(cs, out, valuetable);
143 |         end
144 |         -- need extra?
145 |         if addextra or n == 0 then
146 |             local st = cs.ocap[co].s
147 |             local l = cs.ocap[cs.cap].s - cs.ocap[co].s
148 |             out.outindex = out.outindex + 1
149 |             out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1)
150 |             n = n + 1
151 |         end
152 |         cs.cap = cs.cap + 1 -- skip close entry
153 |         return n;
154 |     end
155 | end
156 | 
157 | 
158 | -- Push only the first value generated by nested captures
159 | 
160 | local function pushonenestedvalue(cs, out, valuetable)
161 |     local n = pushnestedvalues(cs, false, out, valuetable)
162 |     for i = n, 2, -1 do
163 |         out.out[out.outindex] = nil
164 |         out.outindex = out.outindex - 1
165 |     end
166 | end
167 | 
168 | 
169 | -- Try to find a named group capture with the name given at the top of
170 | -- the stack; goes backward from 'cap'.
171 | 
172 | local function findback(cs, cap, name, valuetable)
173 |     -- repeat until end of list
174 |     while cap > 0 do
175 |         cap = cap - 1
176 |         local continue
177 |         if cs.ocap[cap].kind == Cclose then
178 |             cap = findopen(cs, cap); -- skip nested captures
179 |         elseif cs.ocap[cap].siz == 0 then
180 |             continue = true -- opening an enclosing capture: skip and get previous
181 |         end
182 |         if not continue and cs.ocap[cap].kind == Cgroup and cs.ocap[cap].idx ~= 0 then
183 |             local gname = valuetable[cs.ocap[cap].idx] -- get group name
184 |             -- right group?
185 |             if name == gname then
186 |                 return cap;
187 |             end
188 |         end
189 |     end
190 |     error(("back reference '%s' not found"):format(name), 0)
191 | end
192 | 
193 | 
194 | -- Back-reference capture. Return number of values pushed.
195 | 
196 | local function backrefcap(cs, out, valuetable)
197 |     local curr = cs.cap;
198 |     local name = valuetable[cs.ocap[cs.cap].idx] -- reference name
199 |     cs.cap = findback(cs, curr, name, valuetable) -- find corresponding group
200 |     local n = pushnestedvalues(cs, false, out, valuetable); -- push group's values
201 |     cs.cap = curr + 1;
202 |     return n;
203 | end
204 | 
205 | 
206 | -- Table capture: creates a new table and populates it with nested
207 | -- captures.
208 | 
209 | local function tablecap(cs, out, valuetable)
210 |     local n = 0;
211 |     local t = {}
212 |     cs.cap = cs.cap + 1
213 |     -- table is empty
214 |     if cs.ocap[cs.cap - 1].siz == 0 then
215 |         while cs.ocap[cs.cap].kind ~= Cclose do
216 |             local subout = { outindex = 0, out = {} }
217 |             -- named group?
218 |             if cs.ocap[cs.cap].kind == Cgroup and cs.ocap[cs.cap].idx ~= 0 then
219 |                 local groupname = valuetable[cs.ocap[cs.cap].idx] -- push group name
220 |                 pushonenestedvalue(cs, subout, valuetable)
221 |                 t[groupname] = subout.out[1]
222 |             else
223 |                 -- not a named group
224 |                 local k = pushcapture(cs, subout, valuetable)
225 |                 -- store all values into table
226 |                 for i = 1, subout.outindex do
227 |                     t[i + n] = subout.out[i]
228 |                 end
229 |                 n = n + k;
230 |             end
231 |         end
232 |         cs.cap = cs.cap + 1 -- skip close entry
233 |     end
234 |     out.outindex = out.outindex + 1
235 |     out.out[out.outindex] = t
236 |     return 1; -- number of values pushed (only the table)
237 | end
238 | 
239 | 
240 | -- Table-query capture
241 | 
242 | local function querycap(cs, out, valuetable)
243 |     local table = valuetable[cs.ocap[cs.cap].idx]
244 |     local subout = { outindex = 0, out = {} }
245 |     pushonenestedvalue(cs, subout, valuetable) -- get nested capture
246 |     -- query cap. value at table
247 |     if table[subout.out[1]] ~= nil then
248 |         out.outindex = out.outindex + 1
249 |         out.out[out.outindex] = table[subout.out[1]]
250 |         return 1
251 |     end
252 |     return 0
253 | end
254 | 
255 | 
256 | -- Fold capture
257 | 
258 | local function foldcap(cs, out, valuetable)
259 |     local fce = valuetable[cs.ocap[cs.cap].idx]
260 |     cs.cap = cs.cap + 1
261 |     -- no nested captures?
262 |     -- or no nested captures (large subject)?
263 |     if cs.ocap[cs.cap - 1].siz ~= 0 or
264 |             cs.ocap[cs.cap].kind == Cclose then
265 |         error("no initial value for fold capture", 0);
266 |     end
267 |     local subout = { outindex = 0; out = {} }
268 |     local n = pushcapture(cs, subout, valuetable) -- nested captures with no values?
269 |     if n == 0 then
270 |         error("no initial value for fold capture", 0);
271 |     end
272 |     local acumulator = subout.out[1] -- leave only one result for accumulator
273 |     while cs.ocap[cs.cap].kind ~= Cclose do
274 |         local subout = { outindex = 0; out = {} }
275 |         n = pushcapture(cs, subout, valuetable); -- get next capture's values
276 |         acumulator = fce(acumulator, unpack(subout.out, 1, subout.outindex)) -- call folding function
277 |     end
278 |     cs.cap = cs.cap + 1; -- skip close entry
279 |     out.outindex = out.outindex + 1
280 |     out.out[out.outindex] = acumulator
281 |     return 1; -- only accumulator left on the stack
282 | end
283 | 
284 | 
285 | local function retcount(...)
286 |     return select('#', ...), { ... }
287 | end
288 | 
289 | 
290 | -- Function capture
291 | 
292 | local function functioncap(cs, out, valuetable)
293 |     local fce = valuetable[cs.ocap[cs.cap].idx] --  push function
294 |     local subout = { outindex = 0, out = {} }
295 |     local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures
296 |     local count, ret = retcount(fce(unpack(subout.out, 1, n))) -- call function
297 |     for i = 1, count do
298 |         out.outindex = out.outindex + 1
299 |         out.out[out.outindex] = ret[i]
300 |     end
301 |     return count
302 | end
303 | 
304 | 
305 | -- Select capture
306 | 
307 | local function numcap(cs, out, valuetable)
308 |     local idx = valuetable[cs.ocap[cs.cap].idx] -- value to select
309 |     -- no values?
310 |     if idx == 0 then
311 |         nextcap(cs); -- skip entire capture
312 |         return 0; -- no value produced
313 |     else
314 |         local subout = { outindex = 0, out = {} }
315 |         local n = pushnestedvalues(cs, false, subout, valuetable)
316 |         -- invalid index?
317 |         if n < idx then
318 |             error(("no capture '%d'"):format(idx), 0)
319 |         else
320 |             out.outindex = out.outindex + 1
321 |             out.out[out.outindex] = subout.out[idx] -- get selected capture
322 |             return 1;
323 |         end
324 |     end
325 | end
326 | 
327 | 
328 | -- Calls a runtime capture. Returns number of captures removed by
329 | -- the call, including the initial Cgroup. (Captures to be added are
330 | -- on the Lua stack.)
331 | 
332 | local function runtimecap(cs, close, s, out, valuetable)
333 |     local open = findopen(cs, close)
334 |     assert(cs.ocap[open].kind == Cgroup)
335 |     cs.ocap[close].kind = Cclose; -- closes the group
336 |     cs.ocap[close].s = s;
337 |     cs.cap = open;
338 |     local fce = valuetable[cs.ocap[cs.cap].idx] -- push function to be called
339 |     local subout = { outindex = 0, out = {} }
340 |     local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures
341 |     local count, ret = retcount(fce(cs.s or cs.stream, s, unpack(subout.out, 1, n))) -- call dynamic function
342 |     for i = 1, count do
343 |         out.outindex = out.outindex + 1
344 |         out.out[out.outindex] = ret[i]
345 |     end
346 |     return close - open -- number of captures of all kinds removed
347 | end
348 | 
349 | -- Collect values from current capture into array 'cps'. Current
350 | -- capture must be Cstring (first call) or Csimple (recursive calls).
351 | -- (In first call, fills %0 with whole match for Cstring.)
352 | -- Returns number of elements in the array that were filled.
353 | 
354 | local function getstrcaps(cs, cps, n)
355 |     local k = n
356 |     n = n + 1
357 |     cps[k + 1].isstring = true; -- get string value
358 |     cps[k + 1].startstr = cs.ocap[cs.cap].s; -- starts here
359 |     cs.cap = cs.cap + 1
360 |     -- nested captures?
361 |     if cs.ocap[cs.cap - 1].siz == 0 then
362 |         -- traverse them
363 |         while cs.ocap[cs.cap].kind ~= Cclose do
364 |             -- too many captures?
365 |             if n >= MAXSTRCAPS then
366 |                 nextcap(cs); -- skip extra captures (will not need them)
367 |             elseif cs.ocap[cs.cap].kind == Csimple then
368 |                 -- string?
369 |                 n = getstrcaps(cs, cps, n); -- put info. into array
370 |             else
371 |                 cps[n + 1].isstring = false; -- not a string
372 |                 cps[n + 1].origcap = cs.cap; -- keep original capture
373 |                 nextcap(cs);
374 |                 n = n + 1;
375 |             end
376 |         end
377 |         cs.cap = cs.cap + 1 -- skip close
378 |     end
379 |     cps[k + 1].endstr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1 -- ends here
380 |     return n;
381 | end
382 | 
383 | 
384 | -- add next capture value (which should be a string) to buffer 'b'
385 | 
386 | -- String capture: add result to buffer 'b' (instead of pushing
387 | -- it into the stack)
388 | 
389 | local function stringcap(cs, b, valuetable)
390 |     local cps = {}
391 |     for i = 1, MAXSTRCAPS do
392 |         cps[#cps + 1] = {}
393 |     end
394 |     local fmt = valuetable[cs.ocap[cs.cap].idx]
395 |     local n = getstrcaps(cs, cps, 0) - 1; -- collect nested captures
396 |     local i = 1
397 |     -- traverse them
398 |     while i <= #fmt do
399 |         local c = fmt:sub(i, i)
400 |         -- not an escape?
401 |         if c ~= '%' then
402 |             b[#b + 1] = c -- add it to buffer
403 |         elseif fmt:sub(i + 1, i + 1) < '0' or fmt:sub(i + 1, i + 1) > '9' then
404 |             -- not followed by a digit?
405 |             i = i + 1
406 |             b[#b + 1] = fmt:sub(i, i)
407 |         else
408 |             i = i + 1
409 |             local l = fmt:sub(i, i) - '0'; -- capture index
410 |             if l > n then
411 |                 error(("invalid capture index (%d)"):format(l), 0)
412 |             elseif cps[l + 1].isstring then
413 |                 b[#b + 1] = cs.s and cs.s:sub(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) or
414 |                         cs.stream(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1)
415 |             else
416 |                 local curr = cs.cap;
417 |                 cs.cap = cps[l + 1].origcap; -- go back to evaluate that nested capture
418 |                 if not addonestring(cs, b, "capture", valuetable) then
419 |                     error(("no values in capture index %d"):format(l), 0)
420 |                 end
421 |                 cs.cap = curr; -- continue from where it stopped
422 |             end
423 |         end
424 |         i = i + 1
425 |     end
426 | end
427 | 
428 | 
429 | -- Substitution capture: add result to buffer 'b'
430 | 
431 | local function substcap(cs, b, valuetable)
432 |     local curr = cs.ocap[cs.cap].s;
433 |     -- no nested captures?
434 |     if cs.ocap[cs.cap].siz ~= 0 then
435 |         -- keep original text
436 |         b[#b + 1] = cs.s and cs.s:sub(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) or
437 |                 cs.stream(curr, cs.ocap[cs.cap].siz - 1 + curr - 1)
438 |     else
439 |         cs.cap = cs.cap + 1 -- skip open entry
440 |         -- traverse nested captures
441 |         while cs.ocap[cs.cap].kind ~= Cclose do
442 |             local next = cs.ocap[cs.cap].s;
443 |             b[#b + 1] = cs.s and cs.s:sub(curr, next - curr + curr - 1) or
444 |                     cs.stream(curr, next - curr + curr - 1) -- add text up to capture
445 |             if addonestring(cs, b, "replacement", valuetable) then
446 |                 curr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1; -- continue after match
447 |             else
448 |                 -- no capture value
449 |                 curr = next; -- keep original text in final result
450 |             end
451 |         end
452 |         b[#b + 1] = cs.s and cs.s:sub(curr, curr + cs.ocap[cs.cap].s - curr - 1) or
453 |                 cs.stream(curr, curr + cs.ocap[cs.cap].s - curr - 1) -- add last piece of text
454 |     end
455 |     cs.cap = cs.cap + 1 -- go to next capture
456 | end
457 | 
458 | 
459 | -- Evaluates a capture and adds its first value to buffer 'b'; returns
460 | -- whether there was a value
461 | 
462 | function addonestring(cs, b, what, valuetable)
463 |     local tag = cs.ocap[cs.cap].kind
464 |     if tag == Cstring then
465 |         stringcap(cs, b, valuetable); -- add capture directly to buffer
466 |         return 1
467 |     elseif tag == Csubst then
468 |         substcap(cs, b, valuetable); -- add capture directly to buffer
469 |         return 1
470 |     else
471 |         local subout = { outindex = 0, out = {} }
472 |         local n = pushcapture(cs, subout, valuetable);
473 |         if n > 0 then
474 |             if type(subout.out[1]) ~= 'string' and type(subout.out[1]) ~= 'number' then
475 |                 error(("invalid %s value (a %s)"):format(what, type(subout.out[1])), 0)
476 |             end
477 |             b[#b + 1] = subout.out[1]
478 |             return n
479 |         end
480 |     end
481 | end
482 | 
483 | 
484 | -- Push all values of the current capture into the stack; returns
485 | -- number of values pushed
486 | 
487 | function pushcapture(cs, out, valuetable)
488 |     local type = cs.ocap[cs.cap].kind
489 |     if type == Cposition then
490 |         out.outindex = out.outindex + 1
491 |         out.out[out.outindex] = cs.ocap[cs.cap].s
492 |         cs.cap = cs.cap + 1;
493 |         return 1;
494 |     elseif type == Cconst then
495 |         out.outindex = out.outindex + 1
496 |         out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx]
497 |         cs.cap = cs.cap + 1
498 |         return 1;
499 |     elseif type == Carg then
500 |         local arg = valuetable[cs.ocap[cs.cap].idx]
501 |         cs.cap = cs.cap + 1
502 |         if arg > cs.ptopcount then
503 |             error(("reference to absent extra argument #%d"):format(arg), 0)
504 |         end
505 |         out.outindex = out.outindex + 1
506 |         out.out[out.outindex] = cs.ptop[arg]
507 |         return 1;
508 |     elseif type == Csimple then
509 |         local k = pushnestedvalues(cs, true, out, valuetable)
510 |         local index = out.outindex
511 |         table.insert(out.out, index - k + 1, out.out[index])
512 |         out[index + 1] = nil
513 |         return k;
514 |     elseif type == Cruntime then
515 |         out.outindex = out.outindex + 1
516 |         out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx]
517 |         cs.cap = cs.cap + 1;
518 |         return 1;
519 |     elseif type == Cstring then
520 |         local b = {}
521 |         stringcap(cs, b, valuetable)
522 |         out.outindex = out.outindex + 1
523 |         out.out[out.outindex] = table.concat(b)
524 |         return 1;
525 |     elseif type == Csubst then
526 |         local b = {}
527 |         substcap(cs, b, valuetable);
528 |         out.outindex = out.outindex + 1
529 |         out.out[out.outindex] = table.concat(b)
530 |         return 1;
531 |     elseif type == Cgroup then
532 |         -- anonymous group?
533 |         if cs.ocap[cs.cap].idx == 0 then
534 |             return pushnestedvalues(cs, false, out, valuetable); -- add all nested values
535 |         else
536 |             -- named group: add no values
537 |             nextcap(cs); -- skip capture
538 |             return 0
539 |         end
540 |     elseif type == Cbackref then
541 |         return backrefcap(cs, out, valuetable)
542 |     elseif type == Ctable then
543 |         return tablecap(cs, out, valuetable)
544 |     elseif type == Cfunction then
545 |         return functioncap(cs, out, valuetable)
546 |     elseif type == Cnum then
547 |         return numcap(cs, out, valuetable)
548 |     elseif type == Cquery then
549 |         return querycap(cs, out, valuetable)
550 |     elseif type == Cfold then
551 |         return foldcap(cs, out, valuetable)
552 |     else
553 |         assert(false)
554 |     end
555 | end
556 | 
557 | 
558 | -- Prepare a CapState structure and traverse the entire list of
559 | -- captures in the stack pushing its results. 's' is the subject
560 | -- string, 'r' is the final position of the match, and 'ptop'
561 | -- the index in the stack where some useful values were pushed.
562 | -- Returns the number of results pushed. (If the list produces no
563 | -- results, push the final position of the match.)
564 | 
565 | local function getcaptures(capture, s, stream, r, valuetable, ...)
566 |     local n = 0;
567 |     local cs = { cap = 0 }
568 |     local out = { outindex = 0; out = {} }
569 |     -- is there any capture?
570 |     if capture[cs.cap].kind ~= Cclose then
571 |         cs.ocap = capture
572 |         cs.s = s;
573 |         cs.stream = stream
574 |         cs.ptopcount, cs.ptop = retcount(...)
575 |         repeat -- collect their values
576 |             n = n + pushcapture(cs, out, valuetable)
577 |         until cs.ocap[cs.cap].kind == Cclose
578 |     end
579 |     -- no capture values?
580 |     if n == 0 then
581 |         if not r then
582 |             return
583 |         else
584 |             return r
585 |         end
586 |     end
587 |     assert(out.outindex < 7998, "(too many captures)")
588 |     return unpack(out.out, 1, out.outindex)
589 | end
590 | 
591 | local function getcapturesruntime(capture, s, stream, notdelete, min, max, captop, valuetable, ...)
592 |     local n = 0;
593 |     local cs = { cap = min }
594 |     local out = { outindex = 0; out = {} }
595 |     cs.ocap = capture
596 |     cs.s = s
597 |     cs.stream = stream
598 |     cs.ptopcount, cs.ptop = retcount(...)
599 |     local start = 0
600 |     repeat -- collect their values
601 |         if not checknextcap(cs, max) then break end
602 |         local notdelete = notdelete or capture[cs.cap].kind == Cgroup and capture[cs.cap].idx ~= 0 and capture[cs.cap].candelete == 0
603 |         pushcapture(cs, out, valuetable)
604 |         if notdelete then
605 |             start = cs.cap
606 |         else
607 |             n = n + cs.cap - start
608 |             for i = 0, captop - cs.cap - 1 do
609 |                 ffi.copy(capture + start + i, capture + cs.cap + i, ffi.sizeof('CAPTURE'))
610 |             end
611 |             max = max - (cs.cap - start)
612 |             captop = captop - (cs.cap - start)
613 |             cs.cap = start
614 |         end
615 |     until cs.cap == max
616 |     assert(out.outindex < 7998, "(too many captures)")
617 |     return n, out.out, out.outindex
618 | end
619 | 
620 | return {
621 |     getcaptures = getcaptures,
622 |     runtimecap = runtimecap,
623 |     getcapturesruntime = getcapturesruntime,
624 | }
625 | 
626 | 


--------------------------------------------------------------------------------
/src/lpcode.lua:
--------------------------------------------------------------------------------
   1 | --[[
   2 | LPEGLJ
   3 | lpcode.lua
   4 | Generating code from tree
   5 | Copyright (C) 2014 Rostislav Sacek.
   6 | based on LPeg v1.0 - PEG pattern matching for Lua
   7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
   8 | http://www.inf.puc-rio.br/~roberto/lpeg/
   9 | 
  10 | ** Permission is hereby granted, free of charge, to any person obtaining
  11 | ** a copy of this software and associated documentation files (the
  12 | ** "Software"), to deal in the Software without restriction, including
  13 | ** without limitation the rights to use, copy, modify, merge, publish,
  14 | ** distribute, sublicense, and/or sell copies of the Software, and to
  15 | ** permit persons to whom the Software is furnished to do so, subject to
  16 | ** the following conditions:
  17 | **
  18 | ** The above copyright notice and this permission notice shall be
  19 | ** included in all copies or substantial portions of the Software.
  20 | **
  21 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  25 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  26 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  27 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 | **
  29 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
  30 | --]]
  31 | local ffi = require "ffi"
  32 | require "lpvm"
  33 | 
  34 | local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
  35 | 
  36 | local TChar = 0
  37 | local TSet = 1
  38 | local TAny = 2 -- standard PEG elements
  39 | local TTrue = 3
  40 | local TFalse = 4
  41 | local TRep = 5
  42 | local TSeq = 6
  43 | local TChoice = 7
  44 | local TNot = 8
  45 | local TAnd = 9
  46 | local TCall = 10
  47 | local TOpenCall = 11
  48 | local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
  49 | local TGrammar = 13 -- sib1 is initial (and first) rule
  50 | local TBehind = 14 -- match behind
  51 | local TCapture = 15 -- regular capture
  52 | local TRunTime = 16 -- run-time capture
  53 | 
  54 | 
  55 | local IAny = 0 -- if no char, fail
  56 | local IChar = 1 -- if char != val, fail
  57 | local ISet = 2 -- if char not in val, fail
  58 | local ITestAny = 3 -- in no char, jump to 'offset'
  59 | local ITestChar = 4 -- if char != val, jump to 'offset'
  60 | local ITestSet = 5 -- if char not in val, jump to 'offset'
  61 | local ISpan = 6 -- read a span of chars in val
  62 | local IBehind = 7 -- walk back 'val' characters (fail if not possible)
  63 | local IRet = 8 -- return from a rule
  64 | local IEnd = 9 -- end of pattern
  65 | local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
  66 | local IJmp = 11 -- jump to 'offset'
  67 | local ICall = 12 -- call rule at 'offset'
  68 | local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
  69 | local ICommit = 14 -- pop choice and jump to 'offset'
  70 | local IPartialCommit = 15 -- update top choice to current position and jump
  71 | local IBackCommit = 16 -- "fails" but jump to its own 'offset'
  72 | local IFailTwice = 17 -- pop one choice and then fail
  73 | local IFail = 18 -- go back to saved state on choice and jump to saved offset
  74 | local IGiveup = 19 -- internal use
  75 | local IFullCapture = 20 -- complete capture of last 'off' chars
  76 | local IOpenCapture = 21 -- start a capture
  77 | local ICloseCapture = 22
  78 | local ICloseRunTime = 23
  79 | 
  80 | 
  81 | local Cclose = 0
  82 | local Cposition = 1
  83 | local Cconst = 2
  84 | local Cbackref = 3
  85 | local Carg = 4
  86 | local Csimple = 5
  87 | local Ctable = 6
  88 | local Cfunction = 7
  89 | local Cquery = 8
  90 | local Cstring = 9
  91 | local Cnum = 10
  92 | local Csubst = 11
  93 | local Cfold = 12
  94 | local Cruntime = 13
  95 | local Cgroup = 14
  96 | 
  97 | 
  98 | local PEnullable = 0
  99 | local PEnofail = 1
 100 | local RuleLR = 0x10000
 101 | local NOINST = -2
 102 | 
 103 | 
 104 | local MAXBEHINDPREDICATE = 255
 105 | local MAXRULES = 200
 106 | local MAXOFF = 0xF
 107 | 
 108 | -- number of siblings for each tree
 109 | local numsiblings = {
 110 |     0, 0, 0, -- char, set, any
 111 |     0, 0, -- true, false
 112 |     1, -- rep
 113 |     2, 2, -- seq, choice
 114 |     1, 1, -- not, and
 115 |     0, 0, 2, 1, -- call, opencall, rule, grammar
 116 |     1, -- behind
 117 |     1, 1 -- capture, runtime capture
 118 | }
 119 | 
 120 | 
 121 | local patternelement = ffi.typeof('PATTERN_ELEMENT')
 122 | local pattern = ffi.typeof('PATTERN')
 123 | local settype = ffi.typeof('int32_t[8]')
 124 | local fullset = settype(-1, -1, -1, -1, -1, -1, -1, -1)
 125 | 
 126 | -- {======================================================
 127 | -- Analysis and some optimizations
 128 | -- =======================================================
 129 | 
 130 | local codegen
 131 | 
 132 | 
 133 | -- Check whether a charset is empty (IFail), singleton (IChar),
 134 | -- full (IAny), or none of those (ISet).
 135 | 
 136 | local function charsettype(cs)
 137 |     local count = 0;
 138 |     local candidate = -1; -- candidate position for a char
 139 |     for i = 0, 8 - 1 do
 140 |         local b = cs[i];
 141 |         if b == 0 then
 142 |             if count > 1 then
 143 |                 return ISet; -- else set is still empty
 144 |             end
 145 |         elseif b == -1 then
 146 |             if count < (i * 32) then
 147 |                 return ISet;
 148 |             else
 149 |                 count = count + 32; -- set is still full
 150 |             end
 151 |             -- byte has only one bit?
 152 |         elseif band(b, (b - 1)) == 0 then
 153 |             if count > 0 then
 154 |                 return ISet; -- set is neither full nor empty
 155 |                 -- set has only one char till now; track it
 156 |             else
 157 |                 count = count + 1;
 158 |                 candidate = i;
 159 |             end
 160 |         else
 161 |             return ISet; -- byte is neither empty, full, nor singleton
 162 |         end
 163 |     end
 164 |     if count == 0 then
 165 |         return IFail, 0 -- empty set
 166 |         -- singleton; find character bit inside byte
 167 |     elseif count == 1 then
 168 |         local b = cs[candidate];
 169 |         local c = candidate * 32;
 170 |         for i = 1, 32 do
 171 |             if b == 1 then
 172 |                 c = c + i - 1
 173 |                 break
 174 |             end
 175 |             b = rshift(b, 1)
 176 |         end
 177 |         return IChar, c
 178 |     elseif count == 256 then
 179 |         return IAny, 0 -- full set
 180 |     else
 181 |         assert(false) -- should have returned by now
 182 |     end
 183 | end
 184 | 
 185 | 
 186 | -- A few basic operations on Charsets
 187 | 
 188 | local function cs_complement(cs)
 189 |     for i = 0, 8 - 1 do
 190 |         cs[i] = bnot(cs[i])
 191 |     end
 192 | end
 193 | 
 194 | 
 195 | local function cs_equal(cs1, cs2)
 196 |     for i = 0, 8 - 1 do
 197 |         if cs1[i] ~= cs2[i] then
 198 |             return
 199 |         end
 200 |     end
 201 |     return true
 202 | end
 203 | 
 204 | 
 205 | -- computes whether sets st1 and st2 are disjoint
 206 | 
 207 | local function cs_disjoint(st1, st2)
 208 |     for i = 0, 8 - 1 do
 209 |         if band(st1[i], st2[i]) ~= 0 then
 210 |             return
 211 |         end
 212 |     end
 213 |     return true
 214 | end
 215 | 
 216 | 
 217 | -- Convert a 'char' pattern (TSet, TChar, TAny) to a charset
 218 | 
 219 | local function tocharset(tree, index, valuetable)
 220 |     local val = settype()
 221 |     if tree.p[index].tag == TSet then
 222 |         ffi.copy(val, valuetable[tree.p[index].val], ffi.sizeof(val))
 223 |         return val
 224 |     elseif tree.p[index].tag == TChar then
 225 |         local b = tree.p[index].val
 226 |         -- only one char
 227 |         -- add that one
 228 |         val[rshift(b, 5)] = lshift(1, band(b, 31))
 229 |         return val
 230 |     elseif tree.p[index].tag == TAny then
 231 |         ffi.fill(val, ffi.sizeof(val), 0xff)
 232 |         return val
 233 |     end
 234 | end
 235 | 
 236 | 
 237 | -- checks whether a pattern has captures
 238 | 
 239 | local function hascaptures(tree, index)
 240 |     if tree.p[index].tag == TCapture or tree.p[index].tag == TRunTime then
 241 |         return true
 242 |     elseif tree.p[index].tag == TCall then
 243 |         return hascaptures(tree, index + tree.p[index].ps)
 244 |     else
 245 |         local ns = numsiblings[tree.p[index].tag + 1]
 246 |         if ns == 0 then
 247 |             return
 248 |         elseif ns == 1 then
 249 |             return hascaptures(tree, index + 1)
 250 |         elseif ns == 2 then
 251 |             if hascaptures(tree, index + 1) then
 252 |                 return true
 253 |             elseif tree.p[index].tag ~= TRule then
 254 |                 return hascaptures(tree, index + tree.p[index].ps)
 255 |             end
 256 |         else
 257 |             assert(false)
 258 |         end
 259 |     end
 260 | end
 261 | 
 262 | 
 263 | -- Checks how a pattern behaves regarding the empty string,
 264 | -- in one of two different ways:
 265 | -- A pattern is *nullable* if it can match without consuming any character;
 266 | -- A pattern is *nofail* if it never fails for any string
 267 | -- (including the empty string).
 268 | -- The difference is only for predicates; for patterns without
 269 | -- predicates, the two properties are equivalent.
 270 | -- (With predicates, &'a' is nullable but not nofail. Of course,
 271 | -- nofail => nullable.)
 272 | -- These functions are all convervative in the following way:
 273 | -- p is nullable => nullable(p)
 274 | -- nofail(p) => p cannot fail
 275 | -- (The function assumes that TOpenCall and TRunTime are not nullable:
 276 | -- TOpenCall must be checked again when the grammar is fixed;
 277 | -- TRunTime is an arbitrary choice.)
 278 | 
 279 | local function checkaux(tree, pred, index, lrcall)
 280 |     lrcall = lrcall or {}
 281 |     local tag = tree.p[index].tag
 282 |     if tag == TChar or tag == TSet or tag == TAny or
 283 |             tag == TFalse or tag == TOpenCall then
 284 |         return -- not nullable
 285 |     elseif tag == TRep or tag == TTrue then
 286 |         return true -- no fail
 287 |     elseif tag == TNot or tag == TBehind then
 288 |         -- can match empty, but may fail
 289 |         if pred == PEnofail then
 290 |             return
 291 |         else
 292 |             return true -- PEnullable
 293 |         end
 294 |     elseif tag == TAnd then
 295 |         -- can match empty; fail iff body does
 296 |         if pred == PEnullable then
 297 |             return true
 298 |         else
 299 |             return checkaux(tree, pred, index + 1, lrcall)
 300 |         end
 301 |         -- can fail; match empty iff body does
 302 |     elseif tag == TRunTime then
 303 |         if pred == PEnofail then
 304 |             return
 305 |         else
 306 |             return checkaux(tree, pred, index + 1, lrcall)
 307 |         end
 308 |     elseif tag == TSeq then
 309 |         if not checkaux(tree, pred, index + 1, lrcall) then
 310 |             return
 311 |         else
 312 |             return checkaux(tree, pred, index + tree.p[index].ps, lrcall)
 313 |         end
 314 |     elseif tag == TChoice then
 315 |         if checkaux(tree, pred, index + tree.p[index].ps, lrcall) then
 316 |             return true
 317 |         else
 318 |             return checkaux(tree, pred, index + 1, lrcall)
 319 |         end
 320 |     elseif tag == TCapture or tag == TGrammar or tag == TRule then
 321 |         return checkaux(tree, pred, index + 1, lrcall)
 322 |     elseif tag == TCall then
 323 |         --left recursive rule
 324 |         if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
 325 |             local lr = index + tree.p[index].ps
 326 |             if lrcall[lr] then
 327 |                 return
 328 |             end
 329 |             lrcall[lr] = true
 330 |         end
 331 |         return checkaux(tree, pred, index + tree.p[index].ps, lrcall)
 332 |     else
 333 |         assert(false)
 334 |     end
 335 | end
 336 | 
 337 | 
 338 | -- number of characters to match a pattern (or -1 if variable)
 339 | -- ('count' avoids infinite loops for grammars)
 340 | 
 341 | local function fixedlenx(tree, count, len, index)
 342 |     local tag = tree.p[index].tag
 343 |     if tag == TChar or tag == TSet or tag == TAny then
 344 |         return len + 1;
 345 |     elseif tag == TFalse or tag == TTrue or tag == TNot or tag == TAnd or tag == TBehind then
 346 |         return len;
 347 |     elseif tag == TRep or tag == TRunTime or tag == TOpenCall then
 348 |         return -1;
 349 |     elseif tag == TCapture or tag == TRule or tag == TGrammar then
 350 |         return fixedlenx(tree, count, len, index + 1)
 351 |     elseif tag == TCall then
 352 |         if count >= MAXRULES then
 353 |             return -1; -- may be a loop
 354 |         else
 355 |             return fixedlenx(tree, count + 1, len, index + tree.p[index].ps)
 356 |         end
 357 |     elseif tag == TSeq then
 358 |         len = fixedlenx(tree, count, len, index + 1)
 359 |         if (len < 0) then
 360 |             return -1;
 361 |         else
 362 |             return fixedlenx(tree, count, len, index + tree.p[index].ps)
 363 |         end
 364 |     elseif tag == TChoice then
 365 |         local n1 = fixedlenx(tree, count, len, index + 1)
 366 |         if n1 < 0 then return -1 end
 367 |         local n2 = fixedlenx(tree, count, len, index + tree.p[index].ps)
 368 |         if n1 == n2 then
 369 |             return n1
 370 |         else
 371 |             return -1
 372 |         end
 373 |     else
 374 |         assert(false)
 375 |     end
 376 | end
 377 | 
 378 | 
 379 | -- Computes the 'first set' of a pattern.
 380 | -- The result is a conservative aproximation:
 381 | --   match p ax -> x' for some x ==> a in first(p).
 382 | --   match p '' -> ''            ==> returns 1.
 383 | -- The set 'follow' is the first set of what follows the
 384 | -- pattern (full set if nothing follows it)
 385 | 
 386 | local function getfirst(tree, follow, index, valuetable, lrcall)
 387 |     lrcall = lrcall or {}
 388 |     local tag = tree.p[index].tag
 389 |     if tag == TChar or tag == TSet or tag == TAny then
 390 |         local firstset = tocharset(tree, index, valuetable)
 391 |         return 0, firstset
 392 |     elseif tag == TTrue then
 393 |         local firstset = settype()
 394 |         ffi.copy(firstset, follow, ffi.sizeof(firstset))
 395 |         return 1, firstset
 396 |     elseif tag == TFalse then
 397 |         local firstset = settype()
 398 |         return 0, firstset
 399 |     elseif tag == TChoice then
 400 |         local e1, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
 401 |         local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
 402 |         for i = 0, 8 - 1 do
 403 |             firstset[i] = bor(firstset[i], csaux[i])
 404 |         end
 405 |         return bor(e1, e2), firstset
 406 |     elseif tag == TSeq then
 407 |         if not checkaux(tree, PEnullable, index + 1) then
 408 |             return getfirst(tree, fullset, index + 1, valuetable, lrcall)
 409 |             -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))
 410 |         else
 411 |             local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
 412 |             local e1, firstset = getfirst(tree, csaux, index + 1, valuetable, lrcall)
 413 |             if e1 == 0 then -- 'e1' ensures that first can be used
 414 |             return 0, firstset
 415 |             -- one of the children has a matchtime?
 416 |             elseif band(bor(e1, e2), 2) == 2 then
 417 |                 return 2, firstset -- pattern has a matchtime capture
 418 |             else
 419 |                 return e2, firstset -- else depends on 'e2'
 420 |             end
 421 |         end
 422 |     elseif tag == TRep then
 423 |         local _, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
 424 |         for i = 0, 8 - 1 do
 425 |             firstset[i] = bor(firstset[i], follow[i])
 426 |         end
 427 |         return 1, firstset -- accept the empty string
 428 |     elseif tag == TCapture or tag == TGrammar or tag == TRule then
 429 |         return getfirst(tree, follow, index + 1, valuetable, lrcall)
 430 |         -- function invalidates any follow info.
 431 |     elseif tag == TRunTime then
 432 |         local e, firstset = getfirst(tree, fullset, index + 1, valuetable, lrcall)
 433 |         if e ~= 0 then
 434 |             return 2, firstset -- function is not "protected"?
 435 |         else
 436 |             return 0, firstset -- pattern inside capture ensures first can be used
 437 |         end
 438 |     elseif tag == TCall then
 439 |         -- left recursive rule
 440 |         if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
 441 |             local lr = index + tree.p[index].ps
 442 |             if lrcall[lr] then
 443 |                 return 0, settype()
 444 |             else
 445 |                 lrcall[lr] = true
 446 |             end
 447 |         end
 448 |         return getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
 449 |     elseif tag == TAnd then
 450 |         local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
 451 |         for i = 0, 8 - 1 do
 452 |             firstset[i] = band(firstset[i], follow[i])
 453 |         end
 454 |         return e, firstset
 455 |     elseif tag == TNot then
 456 |         local firstset = tocharset(tree, index + 1, valuetable)
 457 |         if firstset then
 458 |             cs_complement(firstset)
 459 |             return 1, firstset
 460 |         end
 461 |         local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
 462 |         ffi.copy(firstset, follow, ffi.sizeof(firstset))
 463 |         return bor(e, 1), firstset -- always can accept the empty string
 464 |         -- instruction gives no new information
 465 |     elseif tag == TBehind then
 466 |         -- call 'getfirst' to check for math-time captures
 467 |         local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
 468 |         ffi.copy(firstset, follow, ffi.sizeof(firstset))
 469 |         return bor(e, 1), firstset -- always can accept the empty string
 470 |     else
 471 |         assert(false)
 472 |     end
 473 | end
 474 | 
 475 | 
 476 | -- If it returns true, then pattern can fail only depending on the next
 477 | -- character of the subject
 478 | 
 479 | local function headfail(tree, index, lrcall)
 480 |     lrcall = lrcall or {}
 481 |     local tag = tree.p[index].tag
 482 |     if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
 483 |         return true
 484 |     elseif tag == TTrue or tag == TRep or tag == TRunTime or tag == TNot or tag == TBehind then
 485 |         return
 486 |     elseif tag == TCapture or tag == TGrammar or tag == TRule or tag == TAnd then
 487 |         return headfail(tree, index + 1, lrcall)
 488 |     elseif tag == TCall then
 489 |         -- left recursive rule
 490 |         if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
 491 |             local lr = index + tree.p[index].ps
 492 |             if lrcall[lr] then
 493 |                 return true
 494 |             else
 495 |                 lrcall[lr] = true
 496 |             end
 497 |         end
 498 |         return headfail(tree, index + tree.p[index].ps, lrcall)
 499 |     elseif tag == TSeq then
 500 |         if not checkaux(tree, PEnofail, index + tree.p[index].ps) then
 501 |             return
 502 |         else
 503 |             return headfail(tree, index + 1, lrcall)
 504 |         end
 505 |     elseif tag == TChoice then
 506 |         if not headfail(tree, index + 1, lrcall) then
 507 |             return
 508 |         else
 509 |             return headfail(tree, index + tree.p[index].ps, lrcall)
 510 |         end
 511 |     else
 512 |         assert(false)
 513 |     end
 514 | end
 515 | 
 516 | 
 517 | -- Check whether the code generation for the given tree can benefit
 518 | -- from a follow set (to avoid computing the follow set when it is
 519 | -- not needed)
 520 | 
 521 | local function needfollow(tree, index)
 522 |     local tag = tree.p[index].tag
 523 |     if tag == TChar or tag == TSet or tag == TAny or tag == TFalse or tag == TTrue or tag == TAnd or tag == TNot or
 524 |             tag == TRunTime or tag == TGrammar or tag == TCall or tag == TBehind then
 525 |         return
 526 |     elseif tag == TChoice or tag == TRep then
 527 |         return true
 528 |     elseif tag == TCapture then
 529 |         return needfollow(tree, index + 1)
 530 |     elseif tag == TSeq then
 531 |         return needfollow(tree, index + tree.p[index].ps)
 532 |     else
 533 |         assert(false)
 534 |     end
 535 | end
 536 | 
 537 | -- ======================================================
 538 | 
 539 | 
 540 | -- {======================================================
 541 | -- Code generation
 542 | -- =======================================================
 543 | 
 544 | 
 545 | -- code generation is recursive; 'opt' indicates that the code is
 546 | -- being generated under a 'IChoice' operator jumping to its end.
 547 | -- 'tt' points to a previous test protecting this code. 'fl' is
 548 | -- the follow set of the pattern.
 549 | 
 550 | 
 551 | local function addinstruction(code, op, val)
 552 |     local size = code.size
 553 |     if size >= code.allocsize then
 554 |         code:doublesize()
 555 |     end
 556 |     code.p[size].code = op
 557 |     code.p[size].val = val
 558 |     code.size = size + 1
 559 |     return size
 560 | end
 561 | 
 562 | 
 563 | local function setoffset(code, instruction, offset)
 564 |     code.p[instruction].offset = offset;
 565 | end
 566 | 
 567 | 
 568 | -- Add a capture instruction:
 569 | -- 'op' is the capture instruction; 'cap' the capture kind;
 570 | -- 'key' the key into ktable; 'aux' is optional offset
 571 | 
 572 | local function addinstcap(code, op, cap, key, aux)
 573 |     local i = addinstruction(code, op, bor(cap, lshift(aux, 4)))
 574 |     setoffset(code, i, key)
 575 |     return i
 576 | end
 577 | 
 578 | 
 579 | local function jumptothere(code, instruction, target)
 580 |     if instruction >= 0 then
 581 |         setoffset(code, instruction, target - instruction)
 582 |     end
 583 | end
 584 | 
 585 | 
 586 | local function jumptohere(code, instruction)
 587 |     jumptothere(code, instruction, code.size)
 588 | end
 589 | 
 590 | 
 591 | -- Code an IChar instruction, or IAny if there is an equivalent
 592 | -- test dominating it
 593 | 
 594 | local function codechar(code, c, tt)
 595 |     assert(tt ~= -1)
 596 |     if tt >= 0 and code.p[tt].code == ITestChar and
 597 |             code.p[tt].val == c then
 598 |         addinstruction(code, IAny, 0)
 599 |     else
 600 |         addinstruction(code, IChar, c)
 601 |     end
 602 | end
 603 | 
 604 | 
 605 | -- Code an ISet instruction
 606 | 
 607 | local function coderealcharset(code, cs, valuetable)
 608 |     local ind = #valuetable + 1
 609 |     valuetable[ind] = cs
 610 |     return addinstruction(code, ISet, ind)
 611 | end
 612 | 
 613 | 
 614 | -- code a char set, optimizing unit sets for IChar, "complete"
 615 | -- sets for IAny, and empty sets for IFail; also use an IAny
 616 | -- when instruction is dominated by an equivalent test.
 617 | 
 618 | local function codecharset(code, cs, tt, valuetable)
 619 |     local op, c = charsettype(cs)
 620 |     if op == IChar then
 621 |         codechar(code, c, tt)
 622 |     elseif op == ISet then
 623 |         assert(tt ~= -1)
 624 |         if tt >= 0 and code.p[tt].code == ITestSet and
 625 |                 cs_equal(cs, valuetable[code.p[tt].val]) then
 626 |             addinstruction(code, IAny, 0)
 627 |         else
 628 |             coderealcharset(code, cs, valuetable)
 629 |         end
 630 |     else
 631 |         addinstruction(code, op, c)
 632 |     end
 633 | end
 634 | 
 635 | 
 636 | -- code a test set, optimizing unit sets for ITestChar, "complete"
 637 | -- sets for ITestAny, and empty sets for IJmp (always fails).
 638 | -- 'e' is true iff test should accept the empty string. (Test
 639 | -- instructions in the current VM never accept the empty string.)
 640 | 
 641 | local function codetestset(code, cs, e, valuetable)
 642 |     if e ~= 0 then
 643 |         return NOINST -- no test
 644 |     else
 645 |         local pos = code.size
 646 |         codecharset(code, cs, NOINST, valuetable)
 647 |         local inst = code.p[pos]
 648 |         local code = inst.code
 649 |         if code == IFail then
 650 |             inst.code = IJmp -- always jump
 651 |         elseif code == IAny then
 652 |             inst.code = ITestAny
 653 |         elseif code == IChar then
 654 |             inst.code = ITestChar
 655 |         elseif code == ISet then
 656 |             inst.code = ITestSet
 657 |         else
 658 |             assert(false)
 659 |         end
 660 |         return pos
 661 |     end
 662 | end
 663 | 
 664 | 
 665 | -- Find the final destination of a sequence of jumps
 666 | 
 667 | local function finaltarget(code, i)
 668 |     while code.p[i].code == IJmp do
 669 |         i = i + code.p[i].offset
 670 |     end
 671 |     return i
 672 | end
 673 | 
 674 | 
 675 | -- final label (after traversing any jumps)
 676 | 
 677 | local function finallabel(code, i)
 678 |     return finaltarget(code, i + code.p[i].offset)
 679 | end
 680 | 
 681 | -- <behind(p)> == behind n; <p>   (where n = fixedlen(p))
 682 | 
 683 | local function codebehind(code, tree, index, valuetable)
 684 |     if tree.p[index].val > 0 then
 685 |         addinstruction(code, IBehind, tree.p[index].val)
 686 |     end
 687 |     codegen(code, tree, fullset, false, NOINST, index + 1, valuetable) --  NOINST
 688 | end
 689 | 
 690 | 
 691 | -- Choice; optimizations:
 692 | -- - when p1 is headfail
 693 | -- - when first(p1) and first(p2) are disjoint; than
 694 | -- a character not in first(p1) cannot go to p1, and a character
 695 | -- in first(p1) cannot go to p2 (at it is not in first(p2)).
 696 | -- (The optimization is not valid if p1 accepts the empty string,
 697 | -- as then there is no character at all...)
 698 | -- - when p2 is empty and opt is true; a IPartialCommit can resuse
 699 | -- the Choice already active in the stack.
 700 | 
 701 | local function codechoice(code, tree, fl, opt, p1, p2, valuetable)
 702 |     local emptyp2 = tree.p[p2].tag == TTrue
 703 |     local e1, st1 = getfirst(tree, fullset, p1, valuetable)
 704 |     local _, st2 = getfirst(tree, fl, p2, valuetable)
 705 |     if headfail(tree, p1) or (e1 == 0 and cs_disjoint(st1, st2)) then
 706 |         -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:
 707 |         local test = codetestset(code, st1, 0, valuetable)
 708 |         local jmp = NOINST;
 709 |         codegen(code, tree, fl, false, test, p1, valuetable)
 710 |         if not emptyp2 then
 711 |             jmp = addinstruction(code, IJmp, 0)
 712 |         end
 713 |         jumptohere(code, test)
 714 |         codegen(code, tree, fl, opt, NOINST, p2, valuetable)
 715 |         jumptohere(code, jmp)
 716 |     elseif opt and emptyp2 then
 717 |         -- p1? == IPartialCommit; p1
 718 |         jumptohere(code, addinstruction(code, IPartialCommit, 0))
 719 |         codegen(code, tree, fullset, true, NOINST, p1, valuetable)
 720 |     else
 721 |         -- <p1 / p2> ==
 722 |         --  test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2:
 723 |         local test = codetestset(code, st1, e1, valuetable)
 724 |         local pchoice = addinstruction(code, IChoice, 0)
 725 |         codegen(code, tree, fullset, emptyp2, test, p1, valuetable)
 726 |         local pcommit = addinstruction(code, ICommit, 0)
 727 |         jumptohere(code, pchoice)
 728 |         jumptohere(code, test)
 729 |         codegen(code, tree, fl, opt, NOINST, p2, valuetable)
 730 |         jumptohere(code, pcommit)
 731 |     end
 732 | end
 733 | 
 734 | 
 735 | -- And predicate
 736 | -- optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
 737 | -- (valid only when 'p' has no captures)
 738 | 
 739 | local function codeand(code, tree, tt, index, valuetable)
 740 |     local n = fixedlenx(tree, 0, 0, index)
 741 |     if n >= 0 and n <= MAXBEHINDPREDICATE and not hascaptures(tree, index) then
 742 |         codegen(code, tree, fullset, false, tt, index, valuetable)
 743 |         if n > 0 then
 744 |             addinstruction(code, IBehind, n)
 745 |         end
 746 |     else
 747 |         -- default: Choice L1; p1; BackCommit L2; L1: Fail; L2:
 748 |         local pchoice = addinstruction(code, IChoice, 0)
 749 |         codegen(code, tree, fullset, false, tt, index, valuetable)
 750 |         local pcommit = addinstruction(code, IBackCommit, 0)
 751 |         jumptohere(code, pchoice)
 752 |         addinstruction(code, IFail, 0)
 753 |         jumptohere(code, pcommit)
 754 |     end
 755 | end
 756 | 
 757 | 
 758 | -- Captures: if pattern has fixed (and not too big) length, use
 759 | -- a single IFullCapture instruction after the match; otherwise,
 760 | -- enclose the pattern with OpenCapture - CloseCapture.
 761 | 
 762 | local function codecapture(code, tree, fl, tt, index, valuetable)
 763 |     local len = fixedlenx(tree, 0, 0, index + 1)
 764 |     if len >= 0 and len <= MAXOFF and not hascaptures(tree, index + 1) then
 765 |         codegen(code, tree, fl, false, tt, index + 1, valuetable)
 766 |         addinstcap(code, IFullCapture, tree.p[index].cap, tree.p[index].val, len)
 767 |     else
 768 |         addinstcap(code, IOpenCapture, tree.p[index].cap, tree.p[index].val, 0)
 769 |         codegen(code, tree, fl, false, tt, index + 1, valuetable)
 770 |         addinstcap(code, ICloseCapture, Cclose, 0, 0)
 771 |     end
 772 | end
 773 | 
 774 | 
 775 | local function coderuntime(code, tree, tt, index, valuetable)
 776 |     addinstcap(code, IOpenCapture, Cgroup, tree.p[index].val, 0)
 777 |     codegen(code, tree, fullset, false, tt, index + 1, valuetable)
 778 |     addinstcap(code, ICloseRunTime, Cclose, 0, 0)
 779 | end
 780 | 
 781 | 
 782 | -- Repetion; optimizations:
 783 | -- When pattern is a charset, can use special instruction ISpan.
 784 | -- When pattern is head fail, or if it starts with characters that
 785 | -- are disjoint from what follows the repetions, a simple test
 786 | -- is enough (a fail inside the repetition would backtrack to fail
 787 | -- again in the following pattern, so there is no need for a choice).
 788 | -- When 'opt' is true, the repetion can reuse the Choice already
 789 | -- active in the stack.
 790 | 
 791 | local function coderep(code, tree, opt, fl, index, valuetable)
 792 |     local st = tocharset(tree, index, valuetable)
 793 |     if st then
 794 |         local op = coderealcharset(code, st, valuetable)
 795 |         code.p[op].code = ISpan;
 796 |     else
 797 |         local e1, st = getfirst(tree, fullset, index, valuetable)
 798 |         if headfail(tree, index) or (e1 == 0 and cs_disjoint(st, fl)) then
 799 |             -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2:
 800 |             local test = codetestset(code, st, 0, valuetable)
 801 |             codegen(code, tree, fullset, false, test, index, valuetable)
 802 |             local jmp = addinstruction(code, IJmp, 0)
 803 |             jumptohere(code, test)
 804 |             jumptothere(code, jmp, test)
 805 |         else
 806 |             -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2:
 807 |             -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1;
 808 |             local test = codetestset(code, st, e1, valuetable)
 809 |             local pchoice = NOINST;
 810 |             if opt then
 811 |                 jumptohere(code, addinstruction(code, IPartialCommit, 0))
 812 |             else
 813 |                 pchoice = addinstruction(code, IChoice, 0)
 814 |             end
 815 |             local l2 = code.size
 816 |             codegen(code, tree, fullset, false, NOINST, index, valuetable)
 817 |             local commit = addinstruction(code, IPartialCommit, 0)
 818 |             jumptothere(code, commit, l2)
 819 |             jumptohere(code, pchoice)
 820 |             jumptohere(code, test)
 821 |         end
 822 |     end
 823 | end
 824 | 
 825 | 
 826 | -- Not predicate; optimizations:
 827 | -- In any case, if first test fails, 'not' succeeds, so it can jump to
 828 | -- the end. If pattern is headfail, that is all (it cannot fail
 829 | -- in other parts); this case includes 'not' of simple sets. Otherwise,
 830 | -- use the default code (a choice plus a failtwice).
 831 | 
 832 | local function codenot(code, tree, index, valuetable)
 833 |     local e, st = getfirst(tree, fullset, index, valuetable)
 834 |     local test = codetestset(code, st, e, valuetable)
 835 |     -- test (fail(p1)) -> L1; fail; L1:
 836 |     if headfail(tree, index) then
 837 |         addinstruction(code, IFail, 0)
 838 |     else
 839 |         -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1:
 840 |         local pchoice = addinstruction(code, IChoice, 0)
 841 |         codegen(code, tree, fullset, false, NOINST, index, valuetable)
 842 |         addinstruction(code, IFailTwice, 0)
 843 |         jumptohere(code, pchoice)
 844 |     end
 845 |     jumptohere(code, test)
 846 | end
 847 | 
 848 | 
 849 | -- change open calls to calls, using list 'positions' to find
 850 | -- correct offsets; also optimize tail calls
 851 | 
 852 | local function correctcalls(code, positions, from, to)
 853 |     for i = from, to - 1 do
 854 |         if code.p[i].code == IOpenCall then
 855 |             local n = code.p[i].offset; -- rule number
 856 |             local rule = positions[n]; -- rule position
 857 |             assert(rule == from or code.p[rule - 1].code == IRet)
 858 |             -- call; ret ?
 859 |             if bit.band(code.p[i].val, 0xffff) == 0 and code.p[finaltarget(code, i + 1)].code == IRet then
 860 |                 code.p[i].code = IJmp; -- tail call
 861 |             else
 862 |                 code.p[i].code = ICall;
 863 |             end
 864 |             jumptothere(code, i, rule) -- call jumps to respective rule
 865 |         end
 866 |     end
 867 | end
 868 | 
 869 | 
 870 | -- Code for a grammar:
 871 | -- call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
 872 | 
 873 | local function codegrammar(code, tree, index, valuetable)
 874 |     local positions = {}
 875 |     local rulenumber = 1;
 876 |     --    tree.p[rule].tag
 877 |     local rule = index + 1
 878 |     assert(tree.p[rule].tag == TRule)
 879 |     local LR = 0
 880 |     if band(RuleLR, tree.p[rule].cap) ~= 0 then LR = 1 end
 881 |     local firstcall = addinstruction(code, ICall, LR) -- call initial rule
 882 |     code.p[firstcall].aux = tree.p[rule].val
 883 |     local jumptoend = addinstruction(code, IJmp, 0) -- jump to the end
 884 |     jumptohere(code, firstcall) -- here starts the initial rule
 885 |     while tree.p[rule].tag == TRule do
 886 |         positions[rulenumber] = code.size -- save rule position
 887 |         rulenumber = rulenumber + 1
 888 |         codegen(code, tree, fullset, false, NOINST, rule + 1, valuetable) -- code rule
 889 |         addinstruction(code, IRet, 0)
 890 |         rule = rule + tree.p[rule].ps
 891 |     end
 892 |     assert(tree.p[rule].tag == TTrue)
 893 |     jumptohere(code, jumptoend)
 894 |     correctcalls(code, positions, firstcall + 2, code.size)
 895 | end
 896 | 
 897 | 
 898 | local function codecall(code, tree, index, val)
 899 |     local c = addinstruction(code, IOpenCall, tree.p[index].cap) -- to be corrected later
 900 |     code.p[c].aux = val
 901 |     assert(tree.p[index + tree.p[index].ps].tag == TRule)
 902 |     setoffset(code, c, band(tree.p[index + tree.p[index].ps].cap, 0x7fff)) -- offset = rule number
 903 | end
 904 | 
 905 | 
 906 | local function codeseq(code, tree, fl, opt, tt, p1, p2, valuetable)
 907 |     if needfollow(tree, p1) then
 908 |         local _, fll = getfirst(tree, fl, p2, valuetable) -- p1 follow is p2 first
 909 |         codegen(code, tree, fll, false, tt, p1, valuetable)
 910 |     else
 911 |         -- use 'fullset' as follow
 912 |         codegen(code, tree, fullset, false, tt, p1, valuetable)
 913 |     end
 914 |     -- can p1 consume anything?
 915 |     if (fixedlenx(tree, 0, 0, p1) ~= 0) then
 916 |         tt = NOINST; -- invalidate test
 917 |     end
 918 |     return codegen(code, tree, fl, opt, tt, p2, valuetable)
 919 | end
 920 | 
 921 | 
 922 | -- Main code-generation function: dispatch to auxiliar functions
 923 | -- according to kind of tree
 924 | 
 925 | -- code generation is recursive; 'opt' indicates that the code is being
 926 | -- generated as the last thing inside an optional pattern (so, if that
 927 | -- code is optional too, it can reuse the 'IChoice' already in place for
 928 | -- the outer pattern). 'tt' points to a previous test protecting this
 929 | -- code (or NOINST). 'fl' is the follow set of the pattern.
 930 | 
 931 | function codegen(code, tree, fl, opt, tt, index, valuetable)
 932 |     local tag = tree.p[index].tag
 933 |     if tag == TChar then
 934 |         return codechar(code, tree.p[index].val, tt)
 935 |     elseif tag == TAny then
 936 |         return addinstruction(code, IAny, 0)
 937 |     elseif tag == TSet then
 938 |         return codecharset(code, valuetable[tree.p[index].val], tt, valuetable)
 939 |     elseif tag == TTrue then
 940 |     elseif tag == TFalse then
 941 |         return addinstruction(code, IFail, 0)
 942 |     elseif tag == TSeq then
 943 |         return codeseq(code, tree, fl, opt, tt, index + 1, index + tree.p[index].ps, valuetable)
 944 |     elseif tag == TChoice then
 945 |         return codechoice(code, tree, fl, opt, index + 1, index + tree.p[index].ps, valuetable)
 946 |     elseif tag == TRep then
 947 |         return coderep(code, tree, opt, fl, index + 1, valuetable)
 948 |     elseif tag == TBehind then
 949 |         return codebehind(code, tree, index, valuetable)
 950 |     elseif tag == TNot then
 951 |         return codenot(code, tree, index + 1, valuetable)
 952 |     elseif tag == TAnd then
 953 |         return codeand(code, tree, tt, index + 1, valuetable)
 954 |     elseif tag == TCapture then
 955 |         return codecapture(code, tree, fl, tt, index, valuetable)
 956 |     elseif tag == TRunTime then
 957 |         return coderuntime(code, tree, tt, index, valuetable)
 958 |     elseif tag == TGrammar then
 959 |         return codegrammar(code, tree, index, valuetable)
 960 |     elseif tag == TCall then
 961 |         return codecall(code, tree, index, tree.p[index].val)
 962 |     else
 963 |         assert(false)
 964 |     end
 965 | end
 966 | 
 967 | 
 968 | -- Optimize jumps and other jump-like instructions.
 969 | -- * Update labels of instructions with labels to their final
 970 | -- destinations (e.g., choice L1; ... L1: jmp L2: becomes
 971 | -- choice L2)
 972 | -- * Jumps to other instructions that do jumps become those
 973 | -- instructions (e.g., jump to return becomes a return; jump
 974 | -- to commit becomes a commit)
 975 | 
 976 | local function peephole(code)
 977 |     local i = 0
 978 |     while i < code.size do
 979 |         local tag = code.p[i].code
 980 |         if tag == IChoice or tag == ICall or tag == ICommit or tag == IPartialCommit or
 981 |                 tag == IBackCommit or tag == ITestChar or tag == ITestSet or tag == ITestAny then
 982 |             -- instructions with labels
 983 |             jumptothere(code, i, finallabel(code, i)) -- optimize label
 984 | 
 985 |         elseif tag == IJmp then
 986 |             local ft = finaltarget(code, i)
 987 |             local tag = code.p[ft].code -- jumping to what?
 988 |             -- instructions with unconditional implicit jumps
 989 |             if tag == IRet or tag == IFail or tag == IFailTwice or tag == IEnd then
 990 |                 ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction
 991 |             elseif tag == ICommit or tag == IPartialCommit or tag == IBackCommit then
 992 |                 -- inst. with unconditional explicit jumps
 993 |                 local fft = finallabel(code, ft)
 994 |                 ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction...
 995 |                 jumptothere(code, i, fft) -- but must correct its offset
 996 |                 i = i - 1 -- reoptimize its label
 997 |             else
 998 |                 jumptothere(code, i, ft) -- optimize label
 999 |             end
1000 |         end
1001 |         i = i + 1
1002 |     end
1003 | end
1004 | 
1005 | 
1006 | -- Compile a pattern
1007 | 
1008 | local function compile(tree, index, valuetable)
1009 |     local code = pattern()
1010 |     codegen(code, tree, fullset, false, NOINST, index, valuetable)
1011 |     addinstruction(code, IEnd, 0)
1012 |     peephole(code)
1013 |     ffi.C.free(tree.code)
1014 |     tree.code = code
1015 | end
1016 | 
1017 | local function pat_new(ct, size)
1018 |     size = size or 0
1019 |     local allocsize = size
1020 |     if allocsize < 10 then
1021 |         allocsize = 10
1022 |     end
1023 |     local pat = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
1024 |     assert(pat ~= nil)
1025 |     pat.allocsize = allocsize
1026 |     pat.size = size
1027 |     pat.p = ffi.C.malloc(ffi.sizeof(patternelement) * allocsize)
1028 |     assert(pat.p ~= nil)
1029 |     ffi.fill(pat.p, ffi.sizeof(patternelement) * allocsize)
1030 |     return pat
1031 | end
1032 | 
1033 | local function doublesize(ct)
1034 |     ct.p = ffi.C.realloc(ct.p, ffi.sizeof(patternelement) * ct.allocsize * 2)
1035 |     assert(ct.p ~= nil)
1036 |     ffi.fill(ct.p + ct.allocsize, ffi.sizeof(patternelement) * ct.allocsize)
1037 |     ct.allocsize = ct.allocsize * 2
1038 | end
1039 | 
1040 | local pattreg = {
1041 |     doublesize = doublesize,
1042 | }
1043 | 
1044 | local metareg = {
1045 |     ["__new"] = pat_new,
1046 |     ["__index"] = pattreg
1047 | }
1048 | 
1049 | ffi.metatype(pattern, metareg)
1050 | 
1051 | return {
1052 |     checkaux = checkaux,
1053 |     tocharset = tocharset,
1054 |     fixedlenx = fixedlenx,
1055 |     hascaptures = hascaptures,
1056 |     compile = compile,
1057 | }
1058 | 


--------------------------------------------------------------------------------
/src/lpeglj.lua:
--------------------------------------------------------------------------------
   1 | --[[
   2 | LPEGLJ
   3 | lpeglj.lua
   4 | Main module and tree generation
   5 | Copyright (C) 2014 Rostislav Sacek.
   6 | based on LPeg v1.0 - PEG pattern matching for Lua
   7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
   8 | http://www.inf.puc-rio.br/~roberto/lpeg/
   9 | 
  10 | ** Permission is hereby granted, free of charge, to any person obtaining
  11 | ** a copy of this software and associated documentation files (the
  12 | ** "Software"), to deal in the Software without restriction, including
  13 | ** without limitation the rights to use, copy, modify, merge, publish,
  14 | ** distribute, sublicense, and/or sell copies of the Software, and to
  15 | ** permit persons to whom the Software is furnished to do so, subject to
  16 | ** the following conditions:
  17 | **
  18 | ** The above copyright notice and this permission notice shall be
  19 | ** included in all copies or substantial portions of the Software.
  20 | **
  21 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  25 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  26 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  27 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 | **
  29 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
  30 | --]]
  31 | 
  32 | assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.")
  33 | 
  34 | local ffi = require "ffi"
  35 | local lpcode = require "lpcode"
  36 | local lpprint = require "lpprint"
  37 | local lpvm = require "lpvm"
  38 | 
  39 | local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
  40 | 
  41 | ffi.cdef [[
  42 |  int isalnum(int c);
  43 |  int isalpha(int c);
  44 |  int iscntrl(int c);
  45 |  int isdigit(int c);
  46 |  int isgraph(int c);
  47 |  int islower(int c);
  48 |  int isprint(int c);
  49 |  int ispunct(int c);
  50 |  int isspace(int c);
  51 |  int isupper(int c);
  52 |  int isxdigit(int c);
  53 | ]]
  54 | 
  55 | local MAXBEHIND = 255
  56 | local MAXRULES = 200
  57 | local VERSION = "1.0.0.0LJ"
  58 | 
  59 | local TChar = 0
  60 | local TSet = 1
  61 | local TAny = 2 -- standard PEG elements
  62 | local TTrue = 3
  63 | local TFalse = 4
  64 | local TRep = 5
  65 | local TSeq = 6
  66 | local TChoice = 7
  67 | local TNot = 8
  68 | local TAnd = 9
  69 | local TCall = 10
  70 | local TOpenCall = 11
  71 | local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
  72 | local TGrammar = 13 -- sib1 is initial (and first) rule
  73 | local TBehind = 14 -- match behind
  74 | local TCapture = 15 -- regular capture
  75 | local TRunTime = 16 -- run-time capture
  76 | 
  77 | local IAny = 0 -- if no char, fail
  78 | local IChar = 1 -- if char != val, fail
  79 | local ISet = 2 -- if char not in val, fail
  80 | local ITestAny = 3 -- in no char, jump to 'offset'
  81 | local ITestChar = 4 -- if char != val, jump to 'offset'
  82 | local ITestSet = 5 -- if char not in val, jump to 'offset'
  83 | local ISpan = 6 -- read a span of chars in val
  84 | local IBehind = 7 -- walk back 'val' characters (fail if not possible)
  85 | local IRet = 8 -- return from a rule
  86 | local IEnd = 9 -- end of pattern
  87 | local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
  88 | local IJmp = 11 -- jump to 'offset'
  89 | local ICall = 12 -- call rule at 'offset'
  90 | local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
  91 | local ICommit = 14 -- pop choice and jump to 'offset'
  92 | local IPartialCommit = 15 -- update top choice to current position and jump
  93 | local IBackCommit = 16 -- "fails" but jump to its own 'offset'
  94 | local IFailTwice = 17 -- pop one choice and then fail
  95 | local IFail = 18 -- go back to saved state on choice and jump to saved offset
  96 | local IGiveup = 19 -- internal use
  97 | local IFullCapture = 20 -- complete capture of last 'off' chars
  98 | local IOpenCapture = 21 -- start a capture
  99 | local ICloseCapture = 22
 100 | local ICloseRunTime = 23
 101 | 
 102 | local Cclose = 0
 103 | local Cposition = 1
 104 | local Cconst = 2
 105 | local Cbackref = 3
 106 | local Carg = 4
 107 | local Csimple = 5
 108 | local Ctable = 6
 109 | local Cfunction = 7
 110 | local Cquery = 8
 111 | local Cstring = 9
 112 | local Cnum = 10
 113 | local Csubst = 11
 114 | local Cfold = 12
 115 | local Cruntime = 13
 116 | local Cgroup = 14
 117 | 
 118 | local PEnullable = 0
 119 | local PEnofail = 1
 120 | local PEleftrecursion = 2
 121 | 
 122 | local newgrammar
 123 | 
 124 | local RuleLR = 0x10000
 125 | local Ruleused = 0x20000
 126 | local BCapcandelete = 0x30000
 127 | 
 128 | local LREnable = false
 129 | 
 130 | -- number of siblings for each tree
 131 | local numsiblings = {
 132 |     0, 0, 0, -- char, set, any
 133 |     0, 0, -- true, false
 134 |     1, -- rep
 135 |     2, 2, -- seq, choice
 136 |     1, 1, -- not, and
 137 |     0, 0, 2, 1, -- call, opencall, rule, grammar
 138 |     1, -- behind
 139 |     1, 1 -- capture, runtime capture
 140 | }
 141 | 
 142 | 
 143 | 
 144 | local patternid = 0
 145 | local valuetable = {}
 146 | 
 147 | local funcnames = setmetatable({}, { __mode = 'k' })
 148 | 
 149 | local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
 150 | local treepattern = ffi.typeof('TREEPATTERN')
 151 | local patternelement = ffi.typeof('PATTERN_ELEMENT')
 152 | local pattern = ffi.typeof('PATTERN')
 153 | local settype = ffi.typeof('int32_t[8]')
 154 | local uint32 = ffi.typeof('uint32_t[1]')
 155 | 
 156 | -- Fix a TOpenCall into a TCall node, using table 'postable' to
 157 | -- translate a key to its rule address in the tree. Raises an
 158 | -- error if key does not exist.
 159 | 
 160 | local function fixonecall(postable, grammar, index, valuetable)
 161 |     local name = valuetable[grammar.p[index].val] -- get rule's name
 162 |     local n = postable[name] -- query name in position table
 163 |     -- no position?
 164 |     if not n then
 165 |         error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0)
 166 |     end
 167 |     grammar.p[index].tag = TCall;
 168 |     grammar.p[index].ps = n - index -- position relative to node
 169 |     grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused)
 170 | end
 171 | 
 172 | 
 173 | -- Transform left associative constructions into right
 174 | -- associative ones, for sequence and choice; that is:
 175 | -- (t11 + t12) + t2  =>  t11 + (t12 + t2)
 176 | -- (t11 * t12) * t2  =>  t11 * (t12 * t2)
 177 | -- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
 178 | 
 179 | local function correctassociativity(tree, index)
 180 |     local t1 = index + 1
 181 |     assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq)
 182 |     while tree.p[t1].tag == tree.p[index].tag do
 183 |         local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12
 184 |         local n11size = tree.p[t1].ps - 1;
 185 |         local n12size = n1size - n11size - 1
 186 |         for i = 1, n11size do
 187 |             ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement))
 188 |         end
 189 |         tree.p[index].ps = n11size + 1
 190 |         tree.p[index + tree.p[index].ps].tag = tree.p[index].tag
 191 |         tree.p[index + tree.p[index].ps].ps = n12size + 1
 192 |     end
 193 | end
 194 | 
 195 | 
 196 | -- Make final adjustments in a tree. Fix open calls in tree,
 197 | -- making them refer to their respective rules or raising appropriate
 198 | -- errors (if not inside a grammar). Correct associativity of associative
 199 | -- constructions (making them right associative).
 200 | 
 201 | local function finalfix(fix, postable, grammar, index, valuetable)
 202 | 
 203 |     local tag = grammar.p[index].tag
 204 |     --subgrammars were already fixed
 205 |     if tag == TGrammar then
 206 |         return
 207 |     elseif tag == TOpenCall then
 208 |         -- inside a grammar?
 209 |         if fix then
 210 |             fixonecall(postable, grammar, index, valuetable)
 211 |             -- open call outside grammar
 212 |         else
 213 |             error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0)
 214 |         end
 215 |     elseif tag == TSeq or tag == TChoice then
 216 |         correctassociativity(grammar, index)
 217 |     end
 218 |     local ns = numsiblings[tag + 1]
 219 |     if ns == 0 then
 220 |     elseif ns == 1 then
 221 |         return finalfix(fix, postable, grammar, index + 1, valuetable)
 222 |     elseif ns == 2 then
 223 |         finalfix(fix, postable, grammar, index + 1, valuetable)
 224 |         return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable)
 225 |     else
 226 |         assert(false)
 227 |     end
 228 | end
 229 | 
 230 | 
 231 | -- {======================================================
 232 | -- Tree generation
 233 | -- =======================================================
 234 | 
 235 | local function newcharset()
 236 |     local tree = treepattern(1)
 237 |     valuetable[tree.id] = { settype() }
 238 |     tree.p[0].tag = TSet
 239 |     tree.p[0].val = 1
 240 |     return tree, valuetable[tree.id][1]
 241 | end
 242 | 
 243 | 
 244 | -- add to tree a sequence where first sibling is 'sib' (with size
 245 | -- 'sibsize')
 246 | 
 247 | local function seqaux(tree, sib, start, sibsize)
 248 |     tree.p[start].tag = TSeq;
 249 |     tree.p[start].ps = sibsize + 1
 250 |     ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize)
 251 | end
 252 | 
 253 | 
 254 | -- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got
 255 | -- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
 256 | -- must build a sequence of sequence of sequence...)
 257 | 
 258 | local function fillseq(tree, tag, start, n, s)
 259 |     -- initial n-1 copies of Seq tag; Seq ...
 260 |     for i = 1, n - 1 do
 261 |         tree.p[start].tag = TSeq
 262 |         tree.p[start].ps = 2
 263 |         tree.p[start + 1].tag = tag
 264 |         if s then
 265 |             tree.p[start + 1].val = s:sub(i, i):byte()
 266 |         end
 267 |         start = start + tree.p[start].ps
 268 |     end
 269 |     tree.p[start].tag = tag -- last one does not need TSeq
 270 |     if s then
 271 |         tree.p[start].val = s:sub(n, n):byte()
 272 |     end
 273 | end
 274 | 
 275 | 
 276 | -- Numbers as patterns:
 277 | -- 0 == true (always match); n == TAny repeated 'n' times;
 278 | -- -n == not (TAny repeated 'n' times)
 279 | 
 280 | local function numtree(n)
 281 |     if n == 0 then
 282 |         local tree = treepattern(1)
 283 |         tree.p[0].tag = TTrue
 284 |         return tree
 285 |     else
 286 |         local tree, start
 287 |         if n > 0 then
 288 |             tree = treepattern(2 * n - 1)
 289 |             start = 0
 290 |             -- negative: code it as !(-n)
 291 |         else
 292 |             n = -n;
 293 |             tree = treepattern(2 * n)
 294 |             tree.p[0].tag = TNot
 295 |             start = 1
 296 |         end
 297 |         fillseq(tree, TAny, start, n) -- sequence of 'n' any's
 298 |         return tree;
 299 |     end
 300 | end
 301 | 
 302 | 
 303 | -- Convert value to a pattern
 304 | 
 305 | local function getpatt(val, name)
 306 |     local typ = type(val)
 307 |     if typ == 'string' then
 308 |         -- empty?
 309 |         if #val == 0 then
 310 |             local pat = treepattern(1)
 311 |             pat.p[0].tag = TTrue -- always match
 312 |             return pat
 313 |         else
 314 |             local tree = treepattern(2 * (#val - 1) + 1)
 315 |             fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars
 316 |             return tree
 317 |         end
 318 |     elseif typ == 'number' then
 319 |         return numtree(val)
 320 |     elseif typ == 'boolean' then
 321 |         local pat = treepattern(1)
 322 |         pat.p[0].tag = val and TTrue or TFalse
 323 |         return pat
 324 |     elseif typ == 'table' then
 325 |         return newgrammar(val)
 326 |     elseif typ == 'function' then
 327 |         if name and type(name) == 'string' then
 328 |             funcnames[val] = name
 329 |         end
 330 |         local pat = treepattern(2)
 331 |         valuetable[pat.id] = { val }
 332 |         pat.p[0].tag = TRunTime
 333 |         pat.p[0].val = 1
 334 |         pat.p[1].tag = TTrue
 335 |         return pat
 336 |     elseif ffi.istype(treepattern, val) then
 337 |         assert(val.treesize > 0)
 338 |         return val
 339 |     end
 340 |     assert(false)
 341 | end
 342 | 
 343 | local function copykeys(ktable1, ktable2)
 344 |     local ktable, offset = {}, 0
 345 |     if not ktable1 and not ktable2 then
 346 |         return ktable, 0
 347 |     elseif ktable1 then
 348 |         for i = 1, #ktable1 do
 349 |             ktable[#ktable + 1] = ktable1[i]
 350 |         end
 351 |         offset = #ktable1
 352 |         if not ktable2 then
 353 |             return ktable, 0
 354 |         end
 355 |     end
 356 |     if ktable2 then
 357 |         for i = 1, #ktable2 do
 358 |             ktable[#ktable + 1] = ktable2[i]
 359 |         end
 360 |     end
 361 |     assert(#ktable < 65536, "too many Lua values in pattern")
 362 |     return ktable, offset
 363 | end
 364 | 
 365 | local function correctkeys(tree, index, offset)
 366 |     local tag = tree.p[index].tag
 367 |     if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and
 368 |             tree.p[index].val ~= 0 then
 369 |         tree.p[index].val = tree.p[index].val + offset
 370 |     end
 371 |     local ns = numsiblings[tag + 1]
 372 |     if ns == 0 then
 373 |     elseif ns == 1 then
 374 |         return correctkeys(tree, index + 1, offset)
 375 |     elseif ns == 2 then
 376 |         correctkeys(tree, index + 1, offset)
 377 |         return correctkeys(tree, index + tree.p[index].ps, offset)
 378 |     else
 379 |         assert(false)
 380 |     end
 381 | end
 382 | 
 383 | 
 384 | 
 385 | -- create a new tree, with a new root and one sibling.
 386 | 
 387 | local function newroot1sib(tag, pat)
 388 |     local tree1 = getpatt(pat)
 389 |     local tree = treepattern(1 + tree1.treesize) -- create new tree
 390 |     valuetable[tree.id] = copykeys(valuetable[tree1.id])
 391 |     tree.p[0].tag = tag
 392 |     ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
 393 |     return tree
 394 | end
 395 | 
 396 | 
 397 | -- create a new tree, with a new root and 2 siblings.
 398 | 
 399 | local function newroot2sib(tag, pat1, pat2)
 400 |     local tree1 = getpatt(pat1)
 401 |     local tree2 = getpatt(pat2)
 402 |     local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree
 403 |     local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id])
 404 |     valuetable[tree.id] = ktable
 405 |     tree.p[0].tag = tag
 406 |     tree.p[0].ps = 1 + tree1.treesize
 407 |     ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
 408 |     ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
 409 |     if offset > 0 then
 410 |         correctkeys(tree, 1 + tree1.treesize, offset)
 411 |     end
 412 |     return tree;
 413 | end
 414 | 
 415 | 
 416 | local function lp_P(val, name)
 417 |     assert(type(val) ~= 'nil')
 418 |     return getpatt(val, name)
 419 | end
 420 | 
 421 | 
 422 | -- sequence operator; optimizations:
 423 | -- false x => false, x true => x, true x => x
 424 | -- (cannot do x . false => false because x may have runtime captures)
 425 | 
 426 | local function lp_seq(pat1, pat2)
 427 |     local tree1 = getpatt(pat1)
 428 |     local tree2 = getpatt(pat2)
 429 |     --  false . x == false, x . true = x
 430 |     if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then
 431 |         return tree1
 432 |         -- true . x = x
 433 |     elseif tree1.p[0].tag == TTrue then
 434 |         return tree2
 435 |     else
 436 |         return newroot2sib(TSeq, tree1, tree2)
 437 |     end
 438 | end
 439 | 
 440 | 
 441 | -- choice operator; optimizations:
 442 | -- charset / charset => charset
 443 | -- true / x => true, x / false => x, false / x => x
 444 | -- (x / true is not equivalent to true)
 445 | 
 446 | local function lp_choice(pat1, pat2)
 447 |     local tree1 = getpatt(pat1)
 448 |     local tree2 = getpatt(pat2)
 449 |     local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
 450 |     local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
 451 |     if charset1 and charset2 then
 452 |         local t, set = newcharset()
 453 |         for i = 0, 7 do
 454 |             set[i] = bor(charset1[i], charset2[i])
 455 |         end
 456 |         return t
 457 |     elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then
 458 |         return tree1 -- true / x => true, x / false => x
 459 |     elseif tree1.p[0].tag == TFalse then
 460 |         return tree2 -- false / x => x
 461 |     else
 462 |         return newroot2sib(TChoice, tree1, tree2)
 463 |     end
 464 | end
 465 | 
 466 | 
 467 | -- p^n
 468 | 
 469 | local function lp_star(tree1, n)
 470 |     local tree
 471 |     n = tonumber(n)
 472 |     assert(type(n) == 'number')
 473 |     -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
 474 |     if n >= 0 then
 475 |         tree = treepattern((n + 1) * (tree1.treesize + 1))
 476 |         if lpcode.checkaux(tree1, PEnullable, 0) then
 477 |             error("loop body may accept empty string", 0)
 478 |         end
 479 |         valuetable[tree.id] = copykeys(valuetable[tree1.id])
 480 |         local start = 0
 481 |         -- repeat 'n' times
 482 |         for i = 1, n do
 483 |             seqaux(tree, tree1, start, tree1.treesize)
 484 |             start = start + tree.p[start].ps
 485 |         end
 486 |         tree.p[start].tag = TRep
 487 |         ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
 488 |         -- choice (seq tree1 ... choice tree1 true ...) true
 489 |     else
 490 |         n = -n;
 491 |         -- size = (choice + seq + tree1 + true) * n, but the last has no seq
 492 |         tree = treepattern(n * (tree1.treesize + 3) - 1)
 493 |         valuetable[tree.id] = copykeys(valuetable[tree1.id])
 494 |         local start = 0
 495 |         -- repeat (n - 1) times
 496 |         for i = n, 2, -1 do
 497 |             tree.p[start].tag = TChoice;
 498 |             tree.p[start].ps = i * (tree1.treesize + 3) - 2
 499 |             tree.p[start + tree.p[start].ps].tag = TTrue;
 500 |             start = start + 1
 501 |             seqaux(tree, tree1, start, tree1.treesize)
 502 |             start = start + tree.p[start].ps
 503 |         end
 504 |         tree.p[start].tag = TChoice;
 505 |         tree.p[start].ps = tree1.treesize + 1
 506 |         tree.p[start + tree.p[start].ps].tag = TTrue
 507 |         ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
 508 |     end
 509 |     return tree
 510 | end
 511 | 
 512 | 
 513 | -- #p == &p
 514 | 
 515 | local function lp_and(pat)
 516 |     return newroot1sib(TAnd, pat)
 517 | end
 518 | 
 519 | 
 520 | -- -p == !p
 521 | 
 522 | local function lp_not(pat)
 523 |     return newroot1sib(TNot, pat)
 524 | end
 525 | 
 526 | 
 527 | -- [t1 - t2] == Seq (Not t2) t1
 528 | -- If t1 and t2 are charsets, make their difference.
 529 | 
 530 | local function lp_sub(pat1, pat2)
 531 |     local tree1 = getpatt(pat1)
 532 |     local tree2 = getpatt(pat2)
 533 |     local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
 534 |     local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
 535 |     if charset1 and charset2 then
 536 |         local tree, set = newcharset()
 537 |         for i = 0, 7 do
 538 |             set[i] = band(charset1[i], bnot(charset2[i]))
 539 |         end
 540 |         return tree
 541 |     else
 542 |         local tree = treepattern(2 + tree1.treesize + tree2.treesize)
 543 |         local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id])
 544 |         valuetable[tree.id] = ktable
 545 |         tree.p[0].tag = TSeq; -- sequence of...
 546 |         tree.p[0].ps = 2 + tree2.treesize
 547 |         tree.p[1].tag = TNot; -- ...not...
 548 |         ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
 549 |         ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
 550 |         if offset > 0 then
 551 |             correctkeys(tree, 2 + tree2.treesize, offset)
 552 |         end
 553 |         return tree
 554 |     end
 555 | end
 556 | 
 557 | 
 558 | local function lp_set(val)
 559 |     assert(type(val) == 'string')
 560 |     local tree, set = newcharset()
 561 |     for i = 1, #val do
 562 |         local b = val:sub(i, i):byte()
 563 |         set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
 564 |     end
 565 |     return tree
 566 | end
 567 | 
 568 | 
 569 | local function lp_range(...)
 570 |     local args = { ... }
 571 |     local top = #args
 572 |     local tree, set = newcharset()
 573 |     for i = 1, top do
 574 |         assert(#args[i] == 2, args[i] .. " range must have two characters")
 575 |         for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do
 576 |             set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
 577 |         end
 578 |     end
 579 |     return tree
 580 | end
 581 | 
 582 | 
 583 | -- Look-behind predicate
 584 | 
 585 | local function lp_behind(pat)
 586 |     local tree1 = getpatt(pat)
 587 |     local n = lpcode.fixedlenx(tree1, 0, 0, 0)
 588 |     assert(not lpcode.hascaptures(tree1, 0), "pattern have captures")
 589 |     assert(n >= 0, "pattern may not have fixed length")
 590 |     assert(n <= MAXBEHIND, "pattern too long to look behind")
 591 |     local tree = newroot1sib(TBehind, pat)
 592 |     tree.p[0].val = n;
 593 |     return tree
 594 | end
 595 | 
 596 | 
 597 | -- Create a non-terminal
 598 | 
 599 | local function lp_V(val, p)
 600 |     assert(val, "non-nil value expected")
 601 |     local tree = treepattern(1)
 602 |     valuetable[tree.id] = { val }
 603 |     tree.p[0].tag = TOpenCall
 604 |     tree.p[0].val = 1
 605 |     tree.p[0].cap = p or 0
 606 |     return tree
 607 | end
 608 | 
 609 | 
 610 | -- Create a tree for a non-empty capture, with a body and
 611 | -- optionally with an associated value
 612 | 
 613 | local function capture_aux(cap, pat, val)
 614 |     local tree = newroot1sib(TCapture, pat)
 615 |     tree.p[0].cap = cap
 616 |     if val then
 617 |         local ind = #valuetable[tree.id] + 1
 618 |         assert(ind <= 65536, "too many Lua values in pattern" .. ind)
 619 |         valuetable[tree.id][ind] = val
 620 |         tree.p[0].val = ind
 621 |     end
 622 |     return tree
 623 | end
 624 | 
 625 | 
 626 | -- Fill a tree with an empty capture, using an empty (TTrue) sibling.
 627 | 
 628 | local function auxemptycap(tree, cap, par, start)
 629 |     tree.p[start].tag = TCapture;
 630 |     tree.p[start].cap = cap
 631 |     if type(par) ~= 'nil' then
 632 |         local ind = #valuetable[tree.id] + 1
 633 |         assert(ind <= 65536, "too many Lua values in pattern")
 634 |         valuetable[tree.id][ind] = par
 635 |         tree.p[start].val = ind
 636 |     end
 637 |     tree.p[start + 1].tag = TTrue;
 638 | end
 639 | 
 640 | 
 641 | -- Create a tree for an empty capture
 642 | 
 643 | local function newemptycap(cap, par)
 644 |     local tree = treepattern(2)
 645 |     if type(par) ~= 'nil' then valuetable[tree.id] = {} end
 646 |     auxemptycap(tree, cap, par, 0)
 647 |     return tree
 648 | end
 649 | 
 650 | 
 651 | -- Captures with syntax p / v
 652 | -- (function capture, query capture, string capture, or number capture)
 653 | 
 654 | local function lp_divcapture(pat, par, xxx)
 655 |     local typ = type(par)
 656 |     if typ == "function" then
 657 |         return capture_aux(Cfunction, pat, par)
 658 |     elseif typ == "table" then
 659 |         return capture_aux(Cquery, pat, par)
 660 |     elseif typ == "string" then
 661 |         return capture_aux(Cstring, pat, par)
 662 |     elseif typ == "number" then
 663 |         local tree = newroot1sib(TCapture, pat)
 664 |         assert(0 <= par and par <= 0xffff, "invalid number")
 665 |         tree.p[0].cap = Cnum;
 666 |         local ind = #valuetable[tree.id] + 1
 667 |         assert(ind <= 65536, "too many Lua values in pattern")
 668 |         valuetable[tree.id][ind] = par
 669 |         tree.p[0].val = ind
 670 |         return tree
 671 |     else
 672 |         error("invalid replacement value", 0)
 673 |     end
 674 | end
 675 | 
 676 | 
 677 | local function lp_substcapture(pat)
 678 |     return capture_aux(Csubst, pat)
 679 | end
 680 | 
 681 | 
 682 | local function lp_tablecapture(pat)
 683 |     return capture_aux(Ctable, pat, 0)
 684 | end
 685 | 
 686 | 
 687 | local function lp_groupcapture(pat, val)
 688 |     if not val then
 689 |         return capture_aux(Cgroup, pat)
 690 |     else
 691 |         return capture_aux(Cgroup, pat, val)
 692 |     end
 693 | end
 694 | 
 695 | 
 696 | local function lp_foldcapture(pat, fce)
 697 |     assert(type(fce) == 'function')
 698 |     return capture_aux(Cfold, pat, fce)
 699 | end
 700 | 
 701 | 
 702 | local function lp_simplecapture(pat)
 703 |     return capture_aux(Csimple, pat)
 704 | end
 705 | 
 706 | 
 707 | local function lp_poscapture()
 708 |     return newemptycap(Cposition)
 709 | end
 710 | 
 711 | 
 712 | local function lp_argcapture(val)
 713 |     assert(type(val) == 'number')
 714 |     local tree = newemptycap(Carg, 0)
 715 |     local ind = #valuetable[tree.id] + 1
 716 |     assert(ind <= 65536, "too many Lua values in pattern")
 717 |     valuetable[tree.id][ind] = val
 718 |     tree.p[0].val = ind
 719 |     assert(0 < val and val <= 0xffff, "invalid argument index")
 720 |     return tree
 721 | end
 722 | 
 723 | 
 724 | local function lp_backref(val)
 725 |     return newemptycap(Cbackref, val)
 726 | end
 727 | 
 728 | 
 729 | -- Constant capture
 730 | 
 731 | local function lp_constcapture(...)
 732 |     local tree
 733 |     local args = { ... }
 734 |     local n = select('#', ...) -- number of values
 735 |     -- no values?
 736 |     if n == 0 then
 737 |         tree = treepattern(1) -- no capture
 738 |         tree.p[0].tag = TTrue
 739 |     elseif n == 1 then
 740 |         tree = newemptycap(Cconst, args[1]) -- single constant capture
 741 |         -- create a group capture with all values
 742 |     else
 743 |         tree = treepattern(3 + 3 * (n - 1))
 744 |         valuetable[tree.id] = {}
 745 |         tree.p[0].tag = TCapture
 746 |         tree.p[0].cap = Cgroup
 747 |         local start = 1
 748 |         for i = 1, n - 1 do
 749 |             tree.p[start].tag = TSeq
 750 |             tree.p[start].ps = 3
 751 |             auxemptycap(tree, Cconst, args[i], start + 1)
 752 |             start = start + tree.p[start].ps
 753 |         end
 754 |         auxemptycap(tree, Cconst, args[n], start)
 755 |     end
 756 |     return tree
 757 | end
 758 | 
 759 | 
 760 | local function lp_matchtime(pat, fce, name)
 761 |     assert(type(fce) == 'function')
 762 |     if name and type(name) == 'string' then
 763 |         funcnames[fce] = name
 764 |     end
 765 |     local tree = newroot1sib(TRunTime, pat)
 766 |     local ind = #valuetable[tree.id] + 1
 767 |     assert(ind <= 65536, "too many Lua values in pattern")
 768 |     valuetable[tree.id][ind] = fce
 769 |     tree.p[0].val = ind
 770 |     return tree
 771 | end
 772 | 
 773 | -- ======================================================
 774 | 
 775 | 
 776 | 
 777 | -- ======================================================
 778 | -- Grammar - Tree generation
 779 | -- =======================================================
 780 | 
 781 | 
 782 | -- return index and the pattern for the
 783 | -- initial rule of grammar;
 784 | -- also add that index into position table.
 785 | 
 786 | local function getfirstrule(pat, postab)
 787 |     local key
 788 |     -- access first element
 789 |     if type(pat[1]) == 'string' then
 790 |         key = pat[1]
 791 |     else
 792 |         key = 1
 793 |     end
 794 |     local rule = pat[key]
 795 |     if not rule then
 796 |         error("grammar has no initial rule", 0)
 797 |     end
 798 |     -- initial rule not a pattern?
 799 |     if not ffi.istype(treepattern, rule) then
 800 |         error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0)
 801 |     end
 802 |     postab[key] = 1
 803 |     return key, rule
 804 | end
 805 | 
 806 | 
 807 | -- traverse grammar, collect  all its keys and patterns
 808 | -- into rule table. Create a new table (before all pairs key-pattern) to
 809 | -- collect all keys and their associated positions in the final tree
 810 | -- (the "position table").
 811 | -- Return the number of rules and the total size
 812 | -- for the new tree.
 813 | 
 814 | local function collectrules(pat)
 815 |     local n = 1; -- to count number of rules
 816 |     local postab = {}
 817 |     local firstkeyrule, firstrule = getfirstrule(pat, postab)
 818 |     local rules = { firstkeyrule, firstrule }
 819 |     local size = 2 + firstrule.treesize -- TGrammar + TRule + rule
 820 |     for key, val in pairs(pat) do
 821 |         -- initial rule?
 822 |         if key ~= 1 and tostring(val) ~= tostring(firstrule) then
 823 |             -- value is not a pattern?
 824 |             if not ffi.istype(treepattern, val) then
 825 |                 error(("rule '%s' is not a pattern"):format(tostring(key)), 0)
 826 |             end
 827 |             rules[#rules + 1] = key
 828 |             rules[#rules + 1] = val
 829 |             postab[key] = size
 830 |             size = 1 + size + val.treesize
 831 |             n = n + 1
 832 |         end
 833 |     end
 834 |     size = size + 1; -- TTrue to finish list of rules
 835 |     return n, size, rules, postab
 836 | end
 837 | 
 838 | 
 839 | local function buildgrammar(grammar, rules, n, index, valuetable)
 840 |     local ktable, offset = {}, 0
 841 |     -- add each rule into new tree
 842 |     for i = 1, n do
 843 |         local size = rules[i * 2].treesize
 844 |         grammar.p[index].tag = TRule;
 845 |         grammar.p[index].cap = i; -- rule number
 846 |         grammar.p[index].ps = size + 1; -- point to next rule
 847 |         local ind = #ktable + 1
 848 |         ktable[ind] = rules[i * 2 - 1]
 849 |         grammar.p[index].val = ind
 850 |         ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule
 851 |         ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id])
 852 |         if offset > 0 then
 853 |             correctkeys(grammar, index + 1, offset)
 854 |         end
 855 |         index = index + grammar.p[index].ps; -- move to next rule
 856 |     end
 857 |     grammar.p[index].tag = TTrue; -- finish list of rules
 858 |     return ktable
 859 | end
 860 | 
 861 | 
 862 | -- Check whether a tree has potential infinite loops
 863 | 
 864 | local function checkloops(tree, index)
 865 |     local tag = tree.p[index].tag
 866 |     if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then
 867 |         return true
 868 |     elseif tag == TGrammar then
 869 |         return -- sub-grammars already checked
 870 |     else
 871 |         local tag = numsiblings[tree.p[index].tag + 1]
 872 |         if tag == 0 then
 873 |             return
 874 |         elseif tag == 1 then
 875 |             return checkloops(tree, index + 1)
 876 |         elseif tag == 2 then
 877 |             if checkloops(tree, index + 1) then
 878 |                 return true
 879 |             else
 880 |                 return checkloops(tree, index + tree.p[index].ps)
 881 |             end
 882 |         else
 883 |             assert(false)
 884 |         end
 885 |     end
 886 | end
 887 | 
 888 | -- Check whether a rule can be left recursive; returns PEleftrecursion in that
 889 | -- case; otherwise return 1 iff pattern is nullable.
 890 | 
 891 | local function verifyrule(rulename, tree, passed, nullable, index, valuetable)
 892 |     local tag = tree.p[index].tag
 893 |     if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
 894 |         return nullable; -- cannot pass from here
 895 |     elseif tag == TTrue or tag == TBehind then
 896 |         return true;
 897 |     elseif tag == TNot or tag == TAnd or tag == TRep then
 898 |         return verifyrule(rulename, tree, passed, true, index + 1, valuetable)
 899 |     elseif tag == TCapture or tag == TRunTime then
 900 |         return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
 901 |     elseif tag == TCall then
 902 |         local rule = valuetable[tree.p[index].val]
 903 |         if rule == rulename then return PEleftrecursion end
 904 |         if passed[rule] and passed[rule] > MAXRULES then
 905 |             return nullable
 906 |         end
 907 |         return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
 908 |         -- only check 2nd child if first is nullable
 909 |     elseif tag == TSeq then
 910 |         local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable)
 911 |         if res == PEleftrecursion then
 912 |             return res
 913 |         elseif not res then
 914 |             return nullable
 915 |         else
 916 |             return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
 917 |         end
 918 |         -- must check both children
 919 |     elseif tag == TChoice then
 920 |         nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
 921 |         if nullable == PEleftrecursion then return nullable end
 922 |         return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
 923 |     elseif tag == TRule then
 924 |         local rule = valuetable[tree.p[index].val]
 925 |         passed[rule] = (passed[rule] or 0) + 1
 926 |         return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
 927 |     elseif tag == TGrammar then
 928 |         return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive
 929 |     else
 930 |         assert(false)
 931 |     end
 932 | end
 933 | 
 934 | 
 935 | local function verifygrammar(rule, index, valuetable)
 936 |     -- check left-recursive rules
 937 |     local LR = {}
 938 |     local ind = index + 1
 939 |     while rule.p[ind].tag == TRule do
 940 |         local rulename = valuetable[rule.p[ind].val]
 941 |         -- used rule
 942 |         if rulename then
 943 |             if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then
 944 |                 if not LREnable then
 945 |                     error(("rule '%s' may be left recursive"):format(rulename), 0)
 946 |                 end
 947 |                 LR[rulename] = true
 948 |             end
 949 |         end
 950 |         ind = ind + rule.p[ind].ps
 951 |     end
 952 |     assert(rule.p[ind].tag == TTrue)
 953 | 
 954 |     for i = 0, rule.treesize - 1 do
 955 |         if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then
 956 |             rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive
 957 |         end
 958 |         if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then
 959 |             if rule.p[i].cap == 0 then
 960 |                 rule.p[i].cap = 1 --TCall can be left recursive
 961 |             end
 962 |         end
 963 |     end
 964 | 
 965 |     -- check infinite loops inside rules
 966 |     ind = index + 1
 967 |     while rule.p[ind].tag == TRule do
 968 |         -- used rule
 969 |         if rule.p[ind].val then
 970 |             if checkloops(rule, ind + 1) then
 971 |                 error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0)
 972 |             end
 973 |         end
 974 |         ind = ind + rule.p[ind].ps
 975 |     end
 976 |     assert(rule.p[ind].tag == TTrue)
 977 | end
 978 | 
 979 | 
 980 | -- Give a name for the initial rule if it is not referenced
 981 | 
 982 | local function initialrulename(grammar, val, valuetable)
 983 |     grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused)
 984 |     -- initial rule is not referenced?
 985 |     if grammar.p[1].val == 0 then
 986 |         local ind = #valuetable + 1
 987 |         assert(ind <= 65536, "too many Lua values in pattern")
 988 |         valuetable[ind] = val
 989 |         grammar.p[1].val = ind
 990 |     end
 991 | end
 992 | 
 993 | 
 994 | function newgrammar(pat)
 995 |     -- traverse grammar. Create a new table (before all pairs key-pattern) to
 996 |     -- collect all keys and their associated positions in the final tree
 997 |     -- (the "position table").
 998 |     -- Return new tree.
 999 | 
1000 |     local n, size, rules, postab = collectrules(pat)
1001 |     local grammar = treepattern(size)
1002 |     local start = 0
1003 |     grammar.p[start].tag = TGrammar
1004 |     grammar.p[start].val = n
1005 |     valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable)
1006 |     finalfix(true, postab, grammar, start + 1, valuetable[grammar.id])
1007 |     initialrulename(grammar, rules[1], valuetable[grammar.id])
1008 |     verifygrammar(grammar, 0, valuetable[grammar.id])
1009 |     return grammar
1010 | end
1011 | 
1012 | -- ======================================================
1013 | 
1014 | -- remove duplicity from value table
1015 | 
1016 | local function reducevaluetable(p)
1017 |     local vtable = valuetable[p.id]
1018 |     local value = {}
1019 |     local newvaluetable = {}
1020 | 
1021 |     local function check(v)
1022 |         if v > 0 then
1023 |             local ord = value[vtable[v]]
1024 |             if not ord then
1025 |                 newvaluetable[#newvaluetable + 1] = vtable[v]
1026 |                 ord = #newvaluetable
1027 |                 value[vtable[v]] = ord
1028 |             end
1029 |             return ord
1030 |         end
1031 |         return 0
1032 |     end
1033 | 
1034 |     local function itertree(p, index)
1035 |         local tag = p.p[index].tag
1036 |         if tag == TSet or tag == TCall or tag == TOpenCall or
1037 |                 tag == TRule or tag == TCapture or tag == TRunTime then
1038 |             p.p[index].val = check(p.p[index].val)
1039 |         end
1040 |         local ns = numsiblings[tag + 1]
1041 |         if ns == 0 then
1042 |         elseif ns == 1 then
1043 |             return itertree(p, index + 1)
1044 |         elseif ns == 2 then
1045 |             itertree(p, index + 1)
1046 |             return itertree(p, index + p.p[index].ps)
1047 |         else
1048 |             assert(false)
1049 |         end
1050 |     end
1051 | 
1052 |     if p.treesize > 0 then
1053 |         itertree(p, 0)
1054 |     end
1055 |     if p.code ~= nil then
1056 |         for i = 0, p.code.size - 1 do
1057 |             local code = p.code.p[i].code
1058 |             if code == ICall or code == IJmp then
1059 |                 p.code.p[i].aux = check(p.code.p[i].aux)
1060 |             elseif code == ISet or code == ITestSet or code == ISpan then
1061 |                 p.code.p[i].val = check(p.code.p[i].val)
1062 |             elseif code == IOpenCapture or code == IFullCapture then
1063 |                 p.code.p[i].offset = check(p.code.p[i].offset)
1064 |             end
1065 |         end
1066 |     end
1067 |     valuetable[p.id] = newvaluetable
1068 | end
1069 | 
1070 | 
1071 | local function checkalt(tree)
1072 |     local notchecked = {}
1073 |     local notinalternativerules = {}
1074 | 
1075 |     local function iter(tree, index, choice, rule)
1076 |         local tag = tree[index].tag
1077 |         if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then
1078 |             if not choice then
1079 |                 if rule then
1080 |                     notchecked[rule] = index
1081 |                 end
1082 |             else
1083 |                 tree[index].cap = bit.bor(tree[index].cap, BCapcandelete)
1084 |             end
1085 |         elseif tag == TChoice then
1086 |             choice = true
1087 |         elseif tag == TRule then
1088 |             rule = tree[index].val
1089 |             if bit.band(tree[index].cap, 0xffff) - 1 == 0 then
1090 |                 notinalternativerules[rule] = notinalternativerules[rule] or true
1091 |             end
1092 |         elseif tag == TCall then
1093 |             local r = tree[index].val
1094 |             if not choice then
1095 |                 notinalternativerules[r] = notinalternativerules[r] or true
1096 |             end
1097 |         end
1098 |         local sibs = numsiblings[tree[index].tag + 1] or 0
1099 |         if sibs >= 1 then
1100 |             iter(tree, index + 1, choice, rule)
1101 |             if sibs >= 2 then
1102 |                 return iter(tree, index + tree[index].ps, choice, rule)
1103 |             end
1104 |         end
1105 |     end
1106 | 
1107 |     iter(tree, 0)
1108 |     for k, v in pairs(notchecked) do
1109 |         if not notinalternativerules[k] then
1110 |             tree[v].cap = bit.bor(tree[v].cap, BCapcandelete)
1111 |         end
1112 |     end
1113 | end
1114 | 
1115 | 
1116 | local function prepcompile(p, index)
1117 |     finalfix(false, nil, p, index, valuetable[p.id])
1118 |     checkalt(p.p)
1119 |     lpcode.compile(p, index, valuetable[p.id])
1120 |     reducevaluetable(p)
1121 |     return p.code
1122 | end
1123 | 
1124 | 
1125 | local function lp_printtree(pat, c)
1126 |     assert(pat.treesize > 0)
1127 |     if c then
1128 |         finalfix(false, nil, pat, 0, valuetable[pat.id])
1129 |     end
1130 |     lpprint.printtree(pat.p, 0, 0, valuetable[pat.id])
1131 | end
1132 | 
1133 | 
1134 | local function lp_printcode(pat)
1135 |     -- not compiled yet?
1136 |     if pat.code == nil then
1137 |         prepcompile(pat, 0)
1138 |     end
1139 |     lpprint.printpatt(pat.code, valuetable[pat.id])
1140 | end
1141 | 
1142 | 
1143 | -- Main match function
1144 | 
1145 | local function lp_match(pat, s, init, ...)
1146 |     local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
1147 |     p.code = p.code ~= nil and p.code or prepcompile(p, 0)
1148 |     return lpvm.match(p, s, init, valuetable[p.id], ...)
1149 | end
1150 | 
1151 | local function lp_streammatch(pat, init, ...)
1152 |     local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
1153 |     p.code = p.code ~= nil and p.code or prepcompile(p, 0)
1154 |     return lpvm.streammatch(p, init, valuetable[p.id], ...)
1155 | end
1156 | 
1157 | -- Only for testing purpose
1158 | -- stream emulation (send all chars from string one char after char)
1159 | local function lp_emulatestreammatch(pat, s, init, ...)
1160 |     local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
1161 |     p.code = p.code ~= nil and p.code or prepcompile(p, 0)
1162 |     return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...)
1163 | end
1164 | 
1165 | -- {======================================================
1166 | -- Library creation and functions not related to matching
1167 | -- =======================================================
1168 | 
1169 | local function lp_setmax(val)
1170 |     lpvm.setmax(val)
1171 | end
1172 | 
1173 | local function lp_setmaxbehind(val)
1174 |     lpvm.setmaxbehind(val)
1175 | end
1176 | 
1177 | local function lp_enableleftrecursion(val)
1178 |     LREnable = val
1179 | end
1180 | 
1181 | local function lp_version()
1182 |     return VERSION
1183 | end
1184 | 
1185 | 
1186 | local function lp_type(pat)
1187 |     if ffi.istype(treepattern, pat) then
1188 |         return "pattern"
1189 |     end
1190 | end
1191 | 
1192 | 
1193 | local function createcat(tab, catname, catfce)
1194 |     local t, set = newcharset()
1195 |     for i = 0, 255 do
1196 |         if catfce(i) ~= 0 then
1197 |             set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31)))
1198 |         end
1199 |     end
1200 |     tab[catname] = t
1201 | end
1202 | 
1203 | 
1204 | local function lp_locale(tab)
1205 |     tab = tab or {}
1206 |     createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end)
1207 |     createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end)
1208 |     createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end)
1209 |     createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end)
1210 |     createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end)
1211 |     createcat(tab, "lower", function(c) return ffi.C.islower(c) end)
1212 |     createcat(tab, "print", function(c) return ffi.C.isprint(c) end)
1213 |     createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end)
1214 |     createcat(tab, "space", function(c) return ffi.C.isspace(c) end)
1215 |     createcat(tab, "upper", function(c) return ffi.C.isupper(c) end)
1216 |     createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end)
1217 |     return tab
1218 | end
1219 | 
1220 | 
1221 | local function lp_new(ct, size)
1222 |     local pat = ffi.new(ct, size)
1223 |     pat.treesize = size
1224 |     patternid = patternid + 1
1225 |     pat.id = patternid
1226 |     return pat
1227 | end
1228 | 
1229 | 
1230 | local function lp_gc(ct)
1231 |     valuetable[ct.id] = nil
1232 |     if ct.code ~= nil then
1233 |         ffi.C.free(ct.code.p)
1234 |         ffi.C.free(ct.code)
1235 |     end
1236 | end
1237 | 
1238 | local function lp_eq(ct1, ct2)
1239 |     return tostring(ct1) == tostring(ct2)
1240 | end
1241 | 
1242 | local function lp_load(str, fcetab)
1243 |     local pat, t = lpvm.load(str, fcetab, true)
1244 |     valuetable[pat.id] = t
1245 |     return pat
1246 | end
1247 | 
1248 | local function lp_loadfile(fname, fcetab)
1249 |     local pat, t = lpvm.loadfile(fname, fcetab, true)
1250 |     valuetable[pat.id] = t
1251 |     return pat
1252 | end
1253 | 
1254 | local function lp_dump(ct, tree)
1255 |     local funccount = 0
1256 |     -- not compiled yet?
1257 |     if ct.code == nil then
1258 |         prepcompile(ct, 0)
1259 |     end
1260 |     local out = {}
1261 |     if tree then
1262 |         out[#out + 1] = ffi.string(uint32(ct.treesize), 4)
1263 |         out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize)
1264 |     else
1265 |         out[#out + 1] = ffi.string(uint32(0), 4)
1266 |     end
1267 |     out[#out + 1] = ffi.string(uint32(ct.code.size), 4)
1268 |     out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement))
1269 |     local t = valuetable[ct.id]
1270 |     local len = t and #t or 0
1271 |     out[#out + 1] = ffi.string(uint32(len), 4)
1272 |     if len > 0 then
1273 |         for _, val in ipairs(t) do
1274 |             local typ = type(val)
1275 |             if typ == 'string' then
1276 |                 out[#out + 1] = 'str'
1277 |                 out[#out + 1] = ffi.string(uint32(#val), 4)
1278 |                 out[#out + 1] = val
1279 |             elseif typ == 'number' then
1280 |                 local val = tostring(val)
1281 |                 out[#out + 1] = 'num'
1282 |                 out[#out + 1] = ffi.string(uint32(#val), 4)
1283 |                 out[#out + 1] = val
1284 |             elseif typ == 'cdata' then
1285 |                 out[#out + 1] = 'cdt'
1286 |                 out[#out + 1] = ffi.string(val, ffi.sizeof(val))
1287 |             elseif typ == 'function' then
1288 |                 out[#out + 1] = 'fnc'
1289 |                 funccount = funccount + 1
1290 |                 local name = funcnames[val] or ('FNAME%03d'):format(funccount)
1291 |                 out[#out + 1] = ffi.string(uint32(#name), 4)
1292 |                 out[#out + 1] = name
1293 |                 if not funcnames[val] and debug.getupvalue(val, 1) then
1294 |                     io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0)
1295 |                 end
1296 |                 local data = string.dump(val, true)
1297 |                 out[#out + 1] = ffi.string(uint32(#data), 4)
1298 |                 out[#out + 1] = data
1299 |             else
1300 |                 error(("Type '%s' NYI for dump"):format(typ), 0)
1301 |             end
1302 |         end
1303 |     end
1304 |     return table.concat(out)
1305 | end
1306 | 
1307 | local function lp_save(ct, fname, tree)
1308 |     local file = assert(io.open(fname, 'wb'))
1309 |     file:write(lp_dump(ct, tree))
1310 |     file:close()
1311 | end
1312 | 
1313 | 
1314 | local pattreg = {
1315 |     ["ptree"] = lp_printtree,
1316 |     ["pcode"] = lp_printcode,
1317 |     ["match"] = lp_match,
1318 |     ["streammatch"] = lp_streammatch,
1319 |     ["emulatestreammatch"] = lp_emulatestreammatch,
1320 |     ["setmaxbehind"] = lp_setmaxbehind,
1321 |     ["B"] = lp_behind,
1322 |     ["V"] = lp_V,
1323 |     ["C"] = lp_simplecapture,
1324 |     ["Cc"] = lp_constcapture,
1325 |     ["Cmt"] = lp_matchtime,
1326 |     ["Cb"] = lp_backref,
1327 |     ["Carg"] = lp_argcapture,
1328 |     ["Cp"] = lp_poscapture,
1329 |     ["Cs"] = lp_substcapture,
1330 |     ["Ct"] = lp_tablecapture,
1331 |     ["Cf"] = lp_foldcapture,
1332 |     ["Cg"] = lp_groupcapture,
1333 |     ["P"] = lp_P,
1334 |     ["S"] = lp_set,
1335 |     ["R"] = lp_range,
1336 |     ["L"] = lp_and,
1337 |     ["locale"] = lp_locale,
1338 |     ["version"] = lp_version,
1339 |     ["setmaxstack"] = lp_setmax,
1340 |     ["type"] = lp_type,
1341 |     ["enableleftrecursion"] = lp_enableleftrecursion,
1342 |     ["enablememoization"] = lpvm.enablememoization,
1343 |     ["enabletracing"] = lpvm.enabletracing,
1344 |     ["save"] = lp_save,
1345 |     ["dump"] = lp_dump,
1346 |     ["load"] = lp_load,
1347 |     ["loadfile"] = lp_loadfile,
1348 |     ["__mul"] = lp_seq,
1349 |     ["__add"] = lp_choice,
1350 |     ["__pow"] = lp_star,
1351 |     ["__len"] = lp_and,
1352 |     ["__div"] = lp_divcapture,
1353 |     ["__unm"] = lp_not,
1354 |     ["__sub"] = lp_sub,
1355 | }
1356 | 
1357 | local metareg = {
1358 |     ["__gc"] = lp_gc,
1359 |     ["__new"] = lp_new,
1360 |     ["__mul"] = lp_seq,
1361 |     ["__add"] = lp_choice,
1362 |     ["__pow"] = lp_star,
1363 |     ["__len"] = lp_and,
1364 |     ["__div"] = lp_divcapture,
1365 |     ["__unm"] = lp_not,
1366 |     ["__sub"] = lp_sub,
1367 |     ["__eq"] = lp_eq,
1368 |     ["__index"] = pattreg
1369 | }
1370 | 
1371 | ffi.metatype(treepattern, metareg)
1372 | 
1373 | return pattreg
1374 | 


--------------------------------------------------------------------------------
/src/lpprint.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | LPEGLJ
  3 | lpprint.lua
  4 | Tree, code and debug print function (only for debuging)
  5 | Copyright (C) 2014 Rostislav Sacek.
  6 | based on LPeg v1.0 - PEG pattern matching for Lua
  7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
  8 | http://www.inf.puc-rio.br/~roberto/lpeg/
  9 | 
 10 | ** Permission is hereby granted, free of charge, to any person obtaining
 11 | ** a copy of this software and associated documentation files (the
 12 | ** "Software"), to deal in the Software without restriction, including
 13 | ** without limitation the rights to use, copy, modify, merge, publish,
 14 | ** distribute, sublicense, and/or sell copies of the Software, and to
 15 | ** permit persons to whom the Software is furnished to do so, subject to
 16 | ** the following conditions:
 17 | **
 18 | ** The above copyright notice and this permission notice shall be
 19 | ** included in all copies or substantial portions of the Software.
 20 | **
 21 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 22 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 23 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 24 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 25 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 26 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 27 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 28 | **
 29 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
 30 | --]]
 31 | 
 32 | local ffi = require"ffi"
 33 | local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
 34 | 
 35 | ffi.cdef[[
 36 |   int isprint ( int c );
 37 | ]]
 38 | 
 39 | local RuleLR = 0x10000
 40 | local Ruleused = 0x20000
 41 | 
 42 | -- {======================================================
 43 | -- Printing patterns (for debugging)
 44 | -- =======================================================
 45 | 
 46 | local TChar = 0
 47 | local TSet = 1
 48 | local TAny = 2 -- standard PEG elements
 49 | local TTrue = 3
 50 | local TFalse = 4
 51 | local TRep = 5
 52 | local TSeq = 6
 53 | local TChoice = 7
 54 | local TNot = 8
 55 | local TAnd = 9
 56 | local TCall = 10
 57 | local TOpenCall = 11
 58 | local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
 59 | local TGrammar = 13 -- sib1 is initial (and first) rule
 60 | local TBehind = 14 -- match behind
 61 | local TCapture = 15 -- regular capture
 62 | local TRunTime = 16 -- run-time capture
 63 | 
 64 | local IAny = 0 -- if no char, fail
 65 | local IChar = 1 -- if char != aux, fail
 66 | local ISet = 2 -- if char not in val, fail
 67 | local ITestAny = 3 -- in no char, jump to 'offset'
 68 | local ITestChar = 4 -- if char != aux, jump to 'offset'
 69 | local ITestSet = 5 -- if char not in val, jump to 'offset'
 70 | local ISpan = 6 -- read a span of chars in val
 71 | local IBehind = 7 -- walk back 'aux' characters (fail if not possible)
 72 | local IRet = 8 -- return from a rule
 73 | local IEnd = 9 -- end of pattern
 74 | local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
 75 | local IJmp = 11 -- jump to 'offset'
 76 | local ICall = 12 -- call rule at 'offset'
 77 | local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
 78 | local ICommit = 14 -- pop choice and jump to 'offset'
 79 | local IPartialCommit = 15 -- update top choice to current position and jump
 80 | local IBackCommit = 16 -- "fails" but jump to its own 'offset'
 81 | local IFailTwice = 17 -- pop one choice and then fail
 82 | local IFail = 18 -- go back to saved state on choice and jump to saved offset
 83 | local IGiveup = 19 -- internal use
 84 | local IFullCapture = 20 -- complete capture of last 'off' chars
 85 | local IOpenCapture = 21 -- start a capture
 86 | local ICloseCapture = 22
 87 | local ICloseRunTime = 23
 88 | 
 89 | local Cclose = 0
 90 | local Cposition = 1
 91 | local Cconst = 2
 92 | local Cbackref = 3
 93 | local Carg = 4
 94 | local Csimple = 5
 95 | local Ctable = 6
 96 | local Cfunction = 7
 97 | local Cquery = 8
 98 | local Cstring = 9
 99 | local Cnum = 10
100 | local Csubst = 11
101 | local Cfold = 12
102 | local Cruntime = 13
103 | local Cgroup = 14
104 | 
105 | 
106 | -- number of siblings for each tree
107 | local numsiblings = {
108 |     [TRep] = 1,
109 |     [TSeq] = 2,
110 |     [TChoice] = 2,
111 |     [TNot] = 1,
112 |     [TAnd] = 1,
113 |     [TRule] = 2,
114 |     [TGrammar] = 1,
115 |     [TBehind] = 1,
116 |     [TCapture] = 1,
117 |     [TRunTime] = 1,
118 | }
119 | local names = {
120 |     [IAny] = "any",
121 |     [IChar] = "char",
122 |     [ISet] = "set",
123 |     [ITestAny] = "testany",
124 |     [ITestChar] = "testchar",
125 |     [ITestSet] = "testset",
126 |     [ISpan] = "span",
127 |     [IBehind] = "behind",
128 |     [IRet] = "ret",
129 |     [IEnd] = "end",
130 |     [IChoice] = "choice",
131 |     [IJmp] = "jmp",
132 |     [ICall] = "call",
133 |     [IOpenCall] = "open_call",
134 |     [ICommit] = "commit",
135 |     [IPartialCommit] = "partial_commit",
136 |     [IBackCommit] = "back_commit",
137 |     [IFailTwice] = "failtwice",
138 |     [IFail] = "fail",
139 |     [IGiveup] = "giveup",
140 |     [IFullCapture] = "fullcapture",
141 |     [IOpenCapture] = "opencapture",
142 |     [ICloseCapture] = "closecapture",
143 |     [ICloseRunTime] = "closeruntime"
144 | }
145 | 
146 | local function printcharset(st)
147 |     io.write("[");
148 |     local i = 0
149 |     while i <= 255 do
150 |         local first = i;
151 |         while band(st[rshift(i, 5)], lshift(1, band(i, 31))) ~= 0 and i <= 255 do
152 |             i = i + 1
153 |         end
154 |         if i - 1 == first then -- unary range?
155 |             io.write(("(%02x)"):format(first))
156 |         elseif i - 1 > first then -- non-empty range?
157 |             io.write(("(%02x-%02x)"):format(first, i - 1))
158 |         end
159 |         i = i + 1
160 |     end
161 |     io.write("]")
162 | end
163 | 
164 | local modes = {
165 |     [Cclose] = "close",
166 |     [Cposition] = "position",
167 |     [Cconst] = "constant",
168 |     [Cbackref] = "backref",
169 |     [Carg] = "argument",
170 |     [Csimple] = "simple",
171 |     [Ctable] = "table",
172 |     [Cfunction] = "function",
173 |     [Cquery] = "query",
174 |     [Cstring] = "string",
175 |     [Cnum] = "num",
176 |     [Csubst] = "substitution",
177 |     [Cfold] = "fold",
178 |     [Cruntime] = "runtime",
179 |     [Cgroup] = "group"
180 | }
181 | 
182 | local function printcapkind(kind)
183 |     io.write(("%s"):format(modes[kind]))
184 | end
185 | 
186 | local function printjmp(p, index)
187 |     io.write(("-> %d"):format(index + p[index].offset))
188 | end
189 | 
190 | local function printrulename(p, index, rulenames)
191 |     if rulenames and rulenames[index + p[index].offset] then
192 |         io.write(' ', rulenames[index + p[index].offset])
193 |     end
194 | end
195 | 
196 | local function printinst(p, index, valuetable, rulenames)
197 |     local code = p[index].code
198 |     if rulenames and rulenames[index] then
199 |         io.write(rulenames[index], '\n')
200 |     end
201 |     io.write(("%04d: %s "):format(index, names[code]))
202 |     if code == IChar then
203 |         io.write(("'%s'"):format(string.char(p[index].val)))
204 |     elseif code == ITestChar then
205 |         io.write(("'%s'"):format(string.char(p[index].val)))
206 |         printjmp(p, index)
207 |         printrulename(p, index, rulenames)
208 |     elseif code == IFullCapture then
209 |         printcapkind(band(p[index].val, 0x0f));
210 |         io.write((" (size = %d)  (idx = %s)"):format(band(rshift(p[index].val, 4), 0xF), tostring(valuetable[p[index].offset])))
211 |     elseif code == IOpenCapture then
212 |         printcapkind(band(p[index].val, 0x0f))
213 |         io.write((" (idx = %s)"):format(tostring(valuetable[p[index].offset])))
214 |     elseif code == ISet then
215 |         printcharset(valuetable[p[index].val]);
216 |     elseif code == ITestSet then
217 |         printcharset(valuetable[p[index].val])
218 |         printjmp(p, index);
219 |         printrulename(p, index, rulenames)
220 |     elseif code == ISpan then
221 |         printcharset(valuetable[p[index].val]);
222 |     elseif code == IOpenCall then
223 |         io.write(("-> %d"):format(p[index].offset))
224 |     elseif code == IBehind then
225 |         io.write(("%d"):format(p[index].val))
226 |     elseif code == IJmp or code == ICall or code == ICommit or code == IChoice or
227 |             code == IPartialCommit or code == IBackCommit or code == ITestAny then
228 |         printjmp(p, index);
229 |         if (code == ICall or code == IJmp) and p[index].aux > 0 then
230 |             io.write(' ', valuetable[p[index].aux])
231 |         else
232 |             printrulename(p, index, rulenames)
233 |         end
234 |     end
235 |     io.write("\n")
236 | end
237 | 
238 | 
239 | local function printpatt(p, valuetable)
240 |     local ruleNames = {}
241 |     for i = 0, p.size - 1 do
242 |         local code = p.p[i].code
243 |         if (code == ICall or code == IJmp) and p.p[i].aux > 0 then
244 |             local index = i + p.p[i].offset
245 |             ruleNames[index] = valuetable[p.p[i].aux]
246 |         end
247 |     end
248 |     for i = 0, p.size - 1 do
249 |         printinst(p.p, i, valuetable, ruleNames)
250 |     end
251 | end
252 | 
253 | 
254 | local function printcap(cap, index, valuetable)
255 |     printcapkind(cap[index].kind)
256 |     io.write((" (idx: %s - size: %d) -> %d\n"):format(valuetable[cap[index].idx], cap[index].siz, cap[index].s))
257 | end
258 | 
259 | 
260 | local function printcaplist(cap, limit, valuetable)
261 |     io.write(">======\n")
262 |     local index = 0
263 |     while cap[index].s and index < limit do
264 |         printcap(cap, index, valuetable)
265 |         index = index + 1
266 |     end
267 |     io.write("=======\n")
268 | end
269 | 
270 | -- ======================================================
271 | 
272 | 
273 | 
274 | -- {======================================================
275 | -- Printing trees (for debugging)
276 | -- =======================================================
277 | 
278 | local tagnames = {
279 |     [TChar] = "char",
280 |     [TSet] = "set",
281 |     [TAny] = "any",
282 |     [TTrue] = "true",
283 |     [TFalse] = "false",
284 |     [TRep] = "rep",
285 |     [TSeq] = "seq",
286 |     [TChoice] = "choice",
287 |     [TNot] = "not",
288 |     [TAnd] = "and",
289 |     [TCall] = "call",
290 |     [TOpenCall] = "opencall",
291 |     [TRule] = "rule",
292 |     [TGrammar] = "grammar",
293 |     [TBehind] = "behind",
294 |     [TCapture] = "capture",
295 |     [TRunTime] = "run-time"
296 | }
297 | 
298 | 
299 | local function printtree(tree, ident, index, valuetable)
300 |     for i = 1, ident do
301 |         io.write(" ")
302 |     end
303 |     local tag = tree[index].tag
304 |     io.write(("%s"):format(tagnames[tag]))
305 |     if tag == TChar then
306 |         local c = tree[index].val
307 |         if ffi.C.isprint(c) then
308 |             io.write((" '%c'\n"):format(c))
309 |         else
310 |             io.write((" (%02X)\n"):format(c))
311 |         end
312 |     elseif tag == TSet then
313 |         printcharset(valuetable[tree[index].val]);
314 |         io.write("\n")
315 |     elseif tag == TOpenCall or tag == TCall then
316 |         io.write((" key: %s\n"):format(tostring(valuetable[tree[index].val])))
317 |     elseif tag == TBehind then
318 |         io.write((" %d\n"):format(tree[index].val))
319 |         printtree(tree, ident + 2, index + 1, valuetable);
320 |     elseif tag == TCapture then
321 |         io.write((" cap: %s   n: %s\n"):format(modes[bit.band(tree[index].cap, 0xffff)], valuetable[tree[index].val]))
322 |         printtree(tree, ident + 2, index + 1, valuetable);
323 |     elseif tag == TRule then
324 |         local extra = bit.band(tree[index].cap, RuleLR) == RuleLR and ' left recursive' or ''
325 |         extra = extra .. (bit.band(tree[index].cap, Ruleused) ~= Ruleused and ' not used' or '')
326 |         io.write((" n: %d  key: %s%s\n"):format(bit.band(tree[index].cap, 0xffff) - 1, valuetable[tree[index].val], extra))
327 |         printtree(tree, ident + 2, index + 1, valuetable);
328 |         -- do not print next rule as a sibling
329 |     elseif tag == TGrammar then
330 |         local ruleindex = index + 1
331 |         io.write((" %d\n"):format(tree[index].val)) -- number of rules
332 |         for i = 1, tree[index].val do
333 |             printtree(tree, ident + 2, ruleindex, valuetable);
334 |             ruleindex = ruleindex + tree[ruleindex].ps
335 |         end
336 |         assert(tree[ruleindex].tag == TTrue); -- sentinel
337 |     else
338 |         local sibs = numsiblings[tree[index].tag] or 0
339 |         io.write("\n")
340 |         if sibs >= 1 then
341 |             printtree(tree, ident + 2, index + 1, valuetable);
342 |             if sibs >= 2 then
343 |                 printtree(tree, ident + 2, index + tree[index].ps, valuetable)
344 |             end
345 |         end
346 |     end
347 | end
348 | 
349 | -- }====================================================== */
350 | 
351 | return {
352 |     printtree = printtree,
353 |     printpatt = printpatt,
354 |     printcaplist = printcaplist,
355 |     printinst = printinst
356 | }


--------------------------------------------------------------------------------
/src/lpvm.lua:
--------------------------------------------------------------------------------
   1 | --[[
   2 | LPEGLJ
   3 | lpvm.lua
   4 | Virtual machine
   5 | Copyright (C) 2014 Rostislav Sacek.
   6 | based on LPeg v1.0 - PEG pattern matching for Lua
   7 | Lua.org & PUC-Rio  written by Roberto Ierusalimschy
   8 | http://www.inf.puc-rio.br/~roberto/lpeg/
   9 | 
  10 | ** Permission is hereby granted, free of charge, to any person obtaining
  11 | ** a copy of this software and associated documentation files (the
  12 | ** "Software"), to deal in the Software without restriction, including
  13 | ** without limitation the rights to use, copy, modify, merge, publish,
  14 | ** distribute, sublicense, and/or sell copies of the Software, and to
  15 | ** permit persons to whom the Software is furnished to do so, subject to
  16 | ** the following conditions:
  17 | **
  18 | ** The above copyright notice and this permission notice shall be
  19 | ** included in all copies or substantial portions of the Software.
  20 | **
  21 | ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  24 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  25 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  26 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  27 | ** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 | **
  29 | ** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
  30 | --]]
  31 | 
  32 | local ffi = require "ffi"
  33 | local lpcap = require "lpcap"
  34 | --[[ Only for debug purpose
  35 | local lpprint = require"lpprint"
  36 | --]]
  37 | 
  38 | local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
  39 | 
  40 | -- {======================================================
  41 | -- Virtual Machine
  42 | -- =======================================================
  43 | 
  44 | -- Interpret the result of a dynamic capture: false -> fail;
  45 | -- true -> keep current position; number -> next position.
  46 | -- Return new subject position. 'fr' is stack index where
  47 | -- is the result; 'curr' is current subject position; 'limit'
  48 | -- is subject's size.
  49 | 
  50 | local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate
  51 | local MAXOFF = 0xF -- maximum for full capture
  52 | local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos
  53 | local INITBACK = 400 -- default maximum size for call/backtrack stack
  54 | 
  55 | local IAny = 0 -- if no char, fail
  56 | local IChar = 1 -- if char != val, fail
  57 | local ISet = 2 -- if char not in val, fail
  58 | local ITestAny = 3 -- in no char, jump to 'offset'
  59 | local ITestChar = 4 -- if char != val, jump to 'offset'
  60 | local ITestSet = 5 -- if char not in val, jump to 'offset'
  61 | local ISpan = 6 -- read a span of chars in val
  62 | local IBehind = 7 -- walk back 'val' characters (fail if not possible)
  63 | local IRet = 8 -- return from a rule
  64 | local IEnd = 9 -- end of pattern
  65 | local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
  66 | local IJmp = 11 -- jump to 'offset'
  67 | local ICall = 12 -- call rule at 'offset'
  68 | local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
  69 | local ICommit = 14 -- pop choice and jump to 'offset'
  70 | local IPartialCommit = 15 -- update top choice to current position and jump
  71 | local IBackCommit = 16 -- "fails" but jump to its own 'offset'
  72 | local IFailTwice = 17 -- pop one choice and then fail
  73 | local IFail = 18 -- go back to saved state on choice and jump to saved offset
  74 | local IGiveup = 19 -- internal use
  75 | local IFullCapture = 20 -- complete capture of last 'off' chars
  76 | local IOpenCapture = 21 -- start a capture
  77 | local ICloseCapture = 22
  78 | local ICloseRunTime = 23
  79 | 
  80 | local Cclose = 0
  81 | local Cposition = 1
  82 | local Cconst = 2
  83 | local Cbackref = 3
  84 | local Carg = 4
  85 | local Csimple = 5
  86 | local Ctable = 6
  87 | local Cfunction = 7
  88 | local Cquery = 8
  89 | local Cstring = 9
  90 | local Cnum = 10
  91 | local Csubst = 11
  92 | local Cfold = 12
  93 | local Cruntime = 13
  94 | local Cgroup = 14
  95 | 
  96 | local BCapcandelete = 0x30000
  97 | local maxstack = INITBACK
  98 | local maxcapturedefault = 100
  99 | local maxmemo = 1000
 100 | local usememoization = false
 101 | local trace = false
 102 | 
 103 | local FAIL = -1
 104 | local LRFAIL = -1
 105 | local VOID = -2
 106 | local CHOICE = -3
 107 | local CALL = -4
 108 | 
 109 | ffi.cdef [[
 110 | typedef struct {
 111 |           int code;
 112 |           int val;
 113 |           int offset;
 114 |           int aux;
 115 |          } PATTERN_ELEMENT;
 116 | typedef struct {
 117 |           int allocsize;
 118 |           int size;
 119 |           PATTERN_ELEMENT *p;
 120 |          } PATTERN;
 121 | typedef struct {
 122 |           int tag;
 123 |           int val;
 124 |           int ps;
 125 |           int cap;
 126 |          } TREEPATTERN_ELEMENT;
 127 | typedef struct {
 128 |           int id;
 129 |           int treesize;
 130 |           PATTERN *code;
 131 |           TREEPATTERN_ELEMENT p[?];
 132 |          } TREEPATTERN;
 133 | 
 134 | typedef struct {
 135 |           double s;
 136 |           double X;
 137 |           double memos;
 138 |           int p;
 139 |           int caplevel;
 140 |           int pA;
 141 |           int valuetabletop;
 142 |          } STACK;
 143 | 
 144 | typedef struct {
 145 |           double s;
 146 |           int siz;
 147 |           int idx;
 148 |           int kind;
 149 |           int candelete;
 150 |          } CAPTURE;
 151 | 
 152 | void *malloc( size_t size );
 153 | void free( void *memblock );
 154 | void *realloc( void *memblock, size_t size );
 155 | ]]
 156 | 
 157 | local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
 158 | local treepattern = ffi.typeof('TREEPATTERN')
 159 | local patternelement = ffi.typeof('PATTERN_ELEMENT')
 160 | local pattern = ffi.typeof('PATTERN')
 161 | local settype = ffi.typeof('int32_t[8]')
 162 | 
 163 | local function resdyncaptures(fr, curr, limit, checkstreamlen)
 164 |     local typ = type(fr)
 165 |     -- false value?
 166 |     if not fr then
 167 |         return FAIL -- and fail
 168 |     elseif typ == 'boolean' then
 169 |         -- true?
 170 |         return curr -- keep current position
 171 |     else
 172 |         local res = fr -- new position
 173 |         if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then
 174 |             error("invalid position returned by match-time capture", 0)
 175 |         end
 176 |         return res
 177 |     end
 178 |     assert(false)
 179 | end
 180 | 
 181 | 
 182 | -- Add capture values returned by a dynamic capture to the capture list
 183 | -- 'base', nested inside a group capture. 'fd' indexes the first capture
 184 | -- value, 'n' is the number of values (at least 1).
 185 | 
 186 | local function adddyncaptures(s, base, index, n, fd, valuetable)
 187 |     -- Cgroup capture is already there
 188 |     assert(base[index].kind == Cgroup and base[index].siz == 0)
 189 |     base[index].idx = 0 -- make it an anonymous group
 190 |     base[index + 1] = {}
 191 |     -- add runtime captures
 192 |     for i = 1, n do
 193 |         base[index + i].kind = Cruntime
 194 |         base[index + i].siz = 1 -- mark it as closed
 195 |         local ind = #valuetable + 1
 196 |         valuetable[ind] = fd[i + 1]
 197 |         base[index + i].idx = ind -- stack index of capture value
 198 |         base[index + i].s = s
 199 |         base[index + i + 1] = {}
 200 |     end
 201 |     base[index + n + 1].kind = Cclose -- close group
 202 |     base[index + n + 1].siz = 1
 203 |     base[index + n + 1].s = s
 204 |     base[index + n + 2] = {}
 205 | end
 206 | 
 207 | 
 208 | -- Opcode interpreter
 209 | 
 210 | local function match(stream, last, o, s, op, valuetable, ...)
 211 |     local arg = { ... }
 212 |     local argcount = select('#', ...)
 213 |     local len = #o
 214 |     local ptr = ffi.cast('const unsigned char*', o)
 215 |     s = s - 1
 216 |     local stackptr = 0 -- point to first empty slot in stack
 217 |     local captop = 0 -- point to first empty slot in captures
 218 |     local STACK = ffi.new("STACK[?]", INITBACK)
 219 |     local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
 220 |     local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } }
 221 |     local capturestackptr = #CAPTURESTACK
 222 |     local maxcapture = maxcapturedefault
 223 |     local stacklimit = INITBACK
 224 |     local L = {}
 225 |     local Memo1, Memo2 = {}, {}
 226 |     local memoind = 0
 227 |     local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2))
 228 |     local nocapturereleased = true
 229 | 
 230 |     local p = 0 -- current instruction
 231 |     local streambufsize = 2 ^ 8
 232 |     local streambufsizemask = streambufsize - 1 -- faster modulo
 233 |     local streambufs = {}
 234 |     local streambufoffset = 0
 235 |     local streamstartbuffer = 0
 236 |     local streambufferscount = 0
 237 |     local level = -1
 238 | 
 239 |     local function deletestreambuffers()
 240 |         local min = s
 241 |         for i = stackptr - 1, 0, -1 do
 242 |             local val = STACK[i].s
 243 |             if val >= 0 then
 244 |                 min = math.min(val, min)
 245 |             end
 246 |         end
 247 | 
 248 |         for i = captop - 1, 0, -1 do
 249 |             local val = CAPTURE[i].s
 250 |             if val >= 0 then
 251 |                 min = math.min(val, min)
 252 |             end
 253 |         end
 254 |         for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do
 255 |             -- max behind for full capture and max behind for Look-behind predicate
 256 |             if i + streambufsize + MAXBEHIND < min then
 257 |                 streambufs[i] = nil
 258 |                 streambufferscount = streambufferscount - 1
 259 |             else
 260 |                 streamstartbuffer = i - 1
 261 |                 break
 262 |             end
 263 |         end
 264 |     end
 265 | 
 266 |     local function addstreamdata(s, last)
 267 |         local len = #s
 268 |         local srcoffset = 0
 269 |         if streambufferscount > 128 then
 270 |             deletestreambuffers()
 271 |         end
 272 |         repeat
 273 |             local offset = bit.band(streambufoffset, streambufsizemask)
 274 |             if offset > 0 then
 275 |                 local index = streambufoffset - offset + 1
 276 |                 local count = math.min(len, streambufsize - offset)
 277 |                 ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
 278 |                 len = len - count
 279 |                 srcoffset = srcoffset + count
 280 |                 streambufoffset = streambufoffset + count
 281 |             end
 282 |             if len > 0 then
 283 |                 local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1
 284 |                 local buf = ffi.new('unsigned char[?]', streambufsize)
 285 |                 streambufferscount = streambufferscount + 1
 286 |                 streambufs[index] = buf
 287 |                 local count = math.min(len, streambufsize)
 288 |                 ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
 289 |                 len = len - count
 290 |                 srcoffset = srcoffset + count
 291 |                 streambufoffset = streambufoffset + count
 292 |             end
 293 |             if streambufoffset >= 2 ^ 47 then
 294 |                 error("too big input stream", 0)
 295 |             end
 296 |         until len == 0
 297 |     end
 298 | 
 299 |     local function getstreamchar(s)
 300 |         local offset = bit.band(s, streambufsizemask)
 301 |         local index = s - offset + 1
 302 |         return streambufs[index][offset]
 303 |     end
 304 | 
 305 |     local checkstreamlen
 306 | 
 307 |     local function getstreamstring(st, en)
 308 |         -- TODO Optimalize access
 309 |         local str = {}
 310 |         local i = st >= 0 and st or 1
 311 |         local to = en >= 0 and en or math.huge
 312 |         while true do
 313 |             if i > to then break end
 314 |             if not checkstreamlen(i - 1) then return end
 315 |             if last and (st < 0 or en < 0) then
 316 |                 for j = i, streambufoffset do
 317 |                     str[#str + 1] = string.char(getstreamchar(j - 1))
 318 |                 end
 319 |                 en = en < 0 and streambufoffset + en + 1 or en
 320 |                 en = st > 0 and en - st + 1 or en
 321 |                 st = st < 0 and streambufoffset + st + 1 or 1
 322 |                 return table.concat(str):sub(st, en)
 323 |             else
 324 |                 str[#str + 1] = string.char(getstreamchar(i - 1))
 325 |                 i = i + 1
 326 |             end
 327 |         end
 328 |         return table.concat(str)
 329 |     end
 330 | 
 331 |     function checkstreamlen(index)
 332 |         local str
 333 |         while true do
 334 |             if index < streambufoffset then
 335 |                 return true
 336 |             else
 337 |                 if last then
 338 |                     s = streambufoffset
 339 |                     return false
 340 |                 end
 341 |                 local max = captop
 342 |                 for i = stackptr - 1, 0, -1 do
 343 |                     local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1
 344 |                     if val >= 0 then
 345 |                         max = math.min(val, max)
 346 |                     end
 347 |                 end
 348 |                 local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount))
 349 |                 if n > 0 then
 350 |                     for i = stackptr - 1, 0, -1 do
 351 |                         local val = STACK[i].caplevel
 352 |                         if val > 0 then
 353 |                             STACK[i].caplevel = STACK[i].caplevel - n
 354 |                         end
 355 |                     end
 356 |                     captop = captop - n
 357 |                 end
 358 |                 if outindex > 0 then
 359 |                     nocapturereleased = false
 360 |                 end
 361 |                 str, last = coroutine.yield(1, unpack(out, 1, outindex))
 362 |                 addstreamdata(str)
 363 |             end
 364 |         end
 365 |     end
 366 | 
 367 |     local function doublecapture()
 368 |         maxcapture = maxcapture * 2
 369 |         local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture)
 370 |         ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop)
 371 |         CAPTURE = NEWCAPTURE
 372 |         CAPTURESTACK[capturestackptr].capture = CAPTURE
 373 |         CAPTURESTACK[capturestackptr].maxcapture = maxcapture
 374 |     end
 375 | 
 376 |     local function pushcapture()
 377 |         CAPTURE[captop].idx = op.p[p].offset
 378 |         CAPTURE[captop].kind = band(op.p[p].val, 0x0f)
 379 |         CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0
 380 |         captop = captop + 1
 381 |         p = p + 1
 382 |         if captop >= maxcapture then
 383 |             doublecapture()
 384 |         end
 385 |     end
 386 | 
 387 |     local function traceenter(typ, par)
 388 |         level = level + (par or 0)
 389 |         io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux]))
 390 |     end
 391 | 
 392 |     local function traceleave(inst)
 393 |         io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux]))
 394 |         level = level - 1
 395 |     end
 396 | 
 397 |     local function tracematch(typ, start, par, from, to, inst, extra, ...)
 398 |         local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...)
 399 |         local capstr = {}
 400 |         for i = 1, capscount do capstr[i] = tostring(caps[i]) end
 401 |         extra = extra and '(' .. extra .. ')' or ''
 402 |         io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra,
 403 |             o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " ")))
 404 |         level = level - par
 405 |     end
 406 | 
 407 |     local function fail()
 408 |         -- pattern failed: try to backtrack
 409 |         local X
 410 |         repeat -- remove pending calls
 411 |             stackptr = stackptr - 1
 412 |             if stackptr == -1 then
 413 |                 p = FAIL
 414 |                 return
 415 |             end
 416 |             s = STACK[stackptr].s
 417 |             X = STACK[stackptr].X
 418 |             if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then
 419 |                 Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
 420 |                 Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
 421 |             end
 422 |             -- lvar.2 rest
 423 |             if X == LRFAIL then
 424 |                 CAPTURESTACK[capturestackptr] = nil
 425 |                 capturestackptr = capturestackptr - 1
 426 |                 CAPTURE = CAPTURESTACK[capturestackptr].capture
 427 |                 maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 428 |                 L[STACK[stackptr].pA + s * maxpointer] = nil
 429 |             end
 430 |             if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end
 431 |         until X == CHOICE or X >= 0
 432 |         p = STACK[stackptr].p
 433 |         for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 434 |             table.remove(valuetable)
 435 |         end
 436 |         -- inc.2
 437 |         if X >= 0 then
 438 |             s = X
 439 |             capturestackptr = capturestackptr - 1
 440 |             CAPTURE = CAPTURESTACK[capturestackptr].capture
 441 |             captop = CAPTURESTACK[capturestackptr].captop
 442 |             maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 443 |             local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit
 444 |             while captop + capture.captop >= maxcapture do
 445 |                 doublecapture()
 446 |             end
 447 |             ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 448 |             captop = captop + capture.captop
 449 |             if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end
 450 |             CAPTURESTACK[capturestackptr + 1] = nil
 451 |             L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
 452 |         else
 453 |             captop = STACK[stackptr].caplevel
 454 |         end
 455 |     end
 456 | 
 457 |     local function doublestack()
 458 |         if stackptr >= maxstack then
 459 |             error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0)
 460 |         end
 461 |         stacklimit = stacklimit * 2
 462 |         stacklimit = (stacklimit > maxstack) and maxstack or stacklimit
 463 |         local NEWSTACK = ffi.new("STACK[?]", stacklimit)
 464 |         ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr)
 465 |         STACK = NEWSTACK
 466 |     end
 467 | 
 468 |     if stream then
 469 |         addstreamdata(o)
 470 |         len = nil
 471 |         o = nil
 472 |         ptr = nil
 473 |     end
 474 |     while true do
 475 |         --[[ Only for debug
 476 |         io.write(("s: |%s| stck:%d, caps:%d  \n"):format(s + 1, stackptr, captop))
 477 |         if p ~= FAIL then
 478 |             lpprint.printinst(op.p, p, valuetable)
 479 |             lpprint.printcaplist(CAPTURE, captop, valuetable)
 480 |         end
 481 |         --]]
 482 |         if p == FAIL then return -1 end
 483 |         local code = op.p[p].code
 484 |         if code == IEnd then
 485 |             CAPTURE[captop].kind = Cclose
 486 |             CAPTURE[captop].s = -1
 487 |             return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...)
 488 |         elseif code == IRet then
 489 |             if STACK[stackptr - 1].X == CALL then
 490 |                 stackptr = stackptr - 1
 491 |                 if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end
 492 |                 p = STACK[stackptr].p
 493 |                 if usememoization and STACK[stackptr].memos ~= VOID then
 494 |                     local dif = captop - STACK[stackptr].caplevel
 495 |                     local caps
 496 |                     if dif > 0 then
 497 |                         caps = ffi.new("CAPTURE[?]", dif)
 498 |                         ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE'))
 499 |                     end
 500 |                     local val = { s, dif, caps }
 501 |                     Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
 502 |                     Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
 503 |                 end
 504 |             else
 505 |                 local X = STACK[stackptr - 1].X
 506 |                 -- lvar.1 inc.1
 507 |                 if X == LRFAIL or s > X then
 508 |                     if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end
 509 |                     STACK[stackptr - 1].X = s
 510 |                     p = STACK[stackptr - 1].pA
 511 |                     s = STACK[stackptr - 1].s
 512 |                     local lambda = L[p + s * maxpointer]
 513 |                     lambda.level = lambda.level + 1
 514 |                     lambda.X = STACK[stackptr - 1].X
 515 |                     STACK[stackptr - 1].caplevel = captop
 516 |                     STACK[stackptr - 1].valuetabletop = #valuetable
 517 |                     CAPTURESTACK[capturestackptr].captop = captop
 518 |                     lambda.capturecommit = CAPTURESTACK[capturestackptr]
 519 |                     captop = 0
 520 |                     CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
 521 |                     CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault }
 522 |                     maxcapture = maxcapturedefault
 523 |                 else
 524 |                     -- inc.3
 525 |                     stackptr = stackptr - 1
 526 |                     p = STACK[stackptr].p
 527 |                     s = STACK[stackptr].X
 528 |                     for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 529 |                         table.remove(valuetable)
 530 |                     end
 531 |                     local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer]
 532 |                     capturestackptr = capturestackptr - 1
 533 |                     CAPTURE = CAPTURESTACK[capturestackptr].capture
 534 |                     captop = CAPTURESTACK[capturestackptr].captop
 535 |                     maxcapture = CAPTURESTACK[capturestackptr].maxcapture
 536 |                     local capture = lambda.capturecommit
 537 |                     while captop + capture.captop >= maxcapture do
 538 |                         doublecapture()
 539 |                     end
 540 |                     ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 541 |                     captop = captop + capture.captop
 542 |                     if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end
 543 |                     CAPTURESTACK[capturestackptr + 1] = nil
 544 |                     L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
 545 |                 end
 546 |             end
 547 |         elseif code == IBehind then
 548 |             local n = op.p[p].val
 549 |             if n > s then
 550 |                 fail()
 551 |             else
 552 |                 s = s - n
 553 |                 p = p + 1
 554 |             end
 555 |         elseif code == IJmp then
 556 |             if trace and op.p[p].aux ~= 0 then traceenter('TC') end
 557 |             p = p + op.p[p].offset
 558 |         elseif code == IChoice then
 559 |             if stackptr == stacklimit then
 560 |                 doublestack()
 561 |             end
 562 |             STACK[stackptr].X = CHOICE
 563 |             STACK[stackptr].p = p + op.p[p].offset
 564 |             STACK[stackptr].s = s
 565 |             STACK[stackptr].caplevel = captop
 566 |             STACK[stackptr].valuetabletop = #valuetable
 567 |             stackptr = stackptr + 1
 568 |             p = p + 1
 569 |         elseif code == ICall then
 570 |             if stackptr == stacklimit then
 571 |                 doublestack()
 572 |             end
 573 |             local k = bit.band(op.p[p].val, 0xffff)
 574 |             if k == 0 then
 575 |                 local pA = p + op.p[p].offset
 576 |                 local memo = Memo1[pA + s * maxpointer]
 577 |                 if usememoization and memo then
 578 |                     if trace then traceenter('M', 1) end
 579 |                     if memo == FAIL then
 580 |                         if trace then traceleave(p) end
 581 |                         fail()
 582 |                     else
 583 |                         local dif = memo[2]
 584 |                         if dif > 0 then
 585 |                             while captop + dif >= maxcapture do
 586 |                                 doublecapture()
 587 |                             end
 588 |                             local caps = memo[3]
 589 |                             ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE'))
 590 |                             captop = captop + dif
 591 |                         end
 592 |                         if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end
 593 |                         s = memo[1]
 594 |                         p = p + 1
 595 |                     end
 596 |                 else
 597 |                     if trace then traceenter('', 1) end
 598 |                     STACK[stackptr].X = CALL
 599 |                     STACK[stackptr].s = s
 600 |                     STACK[stackptr].p = p + 1 -- save return address
 601 |                     STACK[stackptr].pA = pA
 602 |                     STACK[stackptr].memos = s
 603 |                     STACK[stackptr].caplevel = captop
 604 |                     stackptr = stackptr + 1
 605 |                     p = pA
 606 |                     if usememoization and not memo then
 607 |                         memoind = memoind + 1
 608 |                         if memoind > maxmemo then
 609 |                             memoind = 0
 610 |                             Memo1 = Memo2
 611 |                             Memo2 = {}
 612 |                         end
 613 |                     end
 614 |                 end
 615 |             else
 616 |                 local pA = p + op.p[p].offset
 617 |                 local X = L[pA + s * maxpointer]
 618 |                 -- lvar.1 lvar.2
 619 |                 if not X then
 620 |                     if trace then traceenter('', 1) end
 621 |                     CAPTURESTACK[capturestackptr].captop = captop
 622 |                     local capture = ffi.new("CAPTURE[?]", maxcapturedefault)
 623 |                     capturestackptr = capturestackptr + 1
 624 |                     CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault }
 625 |                     CAPTURE = capture
 626 |                     maxcapture = maxcapturedefault
 627 |                     captop = 0
 628 |                     L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 }
 629 |                     STACK[stackptr].p = p + 1
 630 |                     STACK[stackptr].pA = pA
 631 |                     STACK[stackptr].s = s
 632 |                     STACK[stackptr].X = LRFAIL
 633 |                     stackptr = stackptr + 1
 634 |                     p = pA
 635 |                 elseif X.X == LRFAIL or k < X.k then
 636 |                     -- lvar.3 lvar.5
 637 |                     fail()
 638 |                 else
 639 |                     -- lvar.4
 640 |                     local capture = X.capturecommit
 641 |                     while captop + capture.captop >= maxcapture do
 642 |                         doublecapture()
 643 |                     end
 644 |                     ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
 645 |                     captop = captop + capture.captop
 646 |                     p = p + 1
 647 |                     s = X.X
 648 |                 end
 649 |             end
 650 |         elseif code == ICommit then
 651 |             stackptr = stackptr - 1
 652 |             p = p + op.p[p].offset
 653 |         elseif code == IPartialCommit then
 654 |             STACK[stackptr - 1].s = s
 655 |             STACK[stackptr - 1].caplevel = captop
 656 |             STACK[stackptr - 1].valuetabletop = #valuetable
 657 |             p = p + op.p[p].offset
 658 |         elseif code == IBackCommit then
 659 |             stackptr = stackptr - 1
 660 |             s = STACK[stackptr].s
 661 |             captop = STACK[stackptr].caplevel
 662 |             for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
 663 |                 table.remove(valuetable)
 664 |             end
 665 |             p = p + op.p[p].offset
 666 |         elseif code == IFailTwice then
 667 |             stackptr = stackptr - 1
 668 |             fail()
 669 |         elseif code == IFail then
 670 |             fail()
 671 |         elseif code == ICloseRunTime then
 672 |             -- invalidate memo
 673 |             for i = 0, stackptr - 1 do
 674 |                 STACK[i].memos = VOID
 675 |             end
 676 |             local cs = {}
 677 |             cs.s = o
 678 |             cs.stream = getstreamstring
 679 |             cs.ocap = CAPTURE
 680 |             cs.ptop = arg
 681 |             cs.ptopcount = argcount
 682 |             local out = { outindex = 0, out = {} }
 683 |             local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function
 684 |             captop = captop - n
 685 |             local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result
 686 |             -- fail?
 687 |             if res == FAIL then
 688 |                 fail()
 689 |             else
 690 |                 s = res - 1 -- else update current position
 691 |                 n = out.outindex - 1 -- number of new captures
 692 |                 -- any new capture?
 693 |                 if n > 0 then
 694 |                     captop = captop + 1
 695 |                     while captop + n + 1 >= maxcapture do
 696 |                         doublecapture()
 697 |                     end
 698 |                     captop = captop + n + 1
 699 |                     -- add new captures to 'capture' list
 700 |                     adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable)
 701 |                 end
 702 |                 p = p + 1
 703 |             end
 704 |         elseif code == ICloseCapture then
 705 |             local s1 = s + 1
 706 |             assert(captop > 0)
 707 |             -- if possible, turn capture into a full capture
 708 |             if CAPTURE[captop - 1].siz == 0 and
 709 |                     s1 - CAPTURE[captop - 1].s < 255 then
 710 |                 CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1
 711 |                 p = p + 1
 712 |             else
 713 |                 CAPTURE[captop].siz = 1
 714 |                 CAPTURE[captop].s = s + 1
 715 |                 pushcapture()
 716 |             end
 717 |         elseif code == IOpenCapture then
 718 |             CAPTURE[captop].siz = 0
 719 |             CAPTURE[captop].s = s + 1
 720 |             pushcapture()
 721 |         elseif code == IFullCapture then
 722 |             CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size
 723 |             CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F)
 724 |             pushcapture()
 725 |             -- standard mode
 726 |         elseif o then
 727 |             if code == IAny then
 728 |                 if s < len then
 729 |                     p = p + 1
 730 |                     s = s + 1
 731 |                 else
 732 |                     fail()
 733 |                 end
 734 |             elseif code == ITestAny then
 735 |                 if s < len then
 736 |                     p = p + 1
 737 |                 else
 738 |                     p = p + op.p[p].offset
 739 |                 end
 740 |             elseif code == IChar then
 741 |                 if s < len and ptr[s] == op.p[p].val then
 742 |                     p = p + 1
 743 |                     s = s + 1
 744 |                 else
 745 |                     fail()
 746 |                 end
 747 |             elseif code == ITestChar then
 748 |                 if s < len and ptr[s] == op.p[p].val then
 749 |                     p = p + 1
 750 |                 else
 751 |                     p = p + op.p[p].offset
 752 |                 end
 753 |             elseif code == ISet then
 754 |                 local c = ptr[s]
 755 |                 local set = valuetable[op.p[p].val]
 756 |                 if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 757 |                     p = p + 1
 758 |                     s = s + 1
 759 |                 else
 760 |                     fail()
 761 |                 end
 762 |             elseif code == ITestSet then
 763 |                 local c = ptr[s]
 764 |                 local set = valuetable[op.p[p].val]
 765 |                 if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 766 |                     p = p + 1
 767 |                 else
 768 |                     p = p + op.p[p].offset
 769 |                 end
 770 |             elseif code == ISpan then
 771 |                 while s < len do
 772 |                     local c = ptr[s]
 773 |                     local set = valuetable[op.p[p].val]
 774 |                     if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
 775 |                         break
 776 |                     end
 777 |                     s = s + 1
 778 |                 end
 779 |                 p = p + 1
 780 |             end
 781 |         else
 782 |             -- stream mode
 783 |             if code == IAny then
 784 |                 if checkstreamlen(s) then
 785 |                     p = p + 1
 786 |                     s = s + 1
 787 |                 else
 788 |                     fail()
 789 |                 end
 790 |             elseif code == ITestAny then
 791 |                 if checkstreamlen(s) then
 792 |                     p = p + 1
 793 |                 else
 794 |                     p = p + op.p[p].offset
 795 |                 end
 796 |             elseif code == IChar then
 797 |                 if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
 798 |                     p = p + 1
 799 |                     s = s + 1
 800 |                 else
 801 |                     fail()
 802 |                 end
 803 |             elseif code == ITestChar then
 804 |                 if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
 805 |                     p = p + 1
 806 |                 else
 807 |                     p = p + op.p[p].offset
 808 |                 end
 809 |             elseif code == ISet then
 810 |                 local c = checkstreamlen(s) and getstreamchar(s)
 811 |                 local set = valuetable[op.p[p].val]
 812 |                 if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 813 |                     p = p + 1
 814 |                     s = s + 1
 815 |                 else
 816 |                     fail()
 817 |                 end
 818 |             elseif code == ITestSet then
 819 |                 local c = checkstreamlen(s) and getstreamchar(s)
 820 |                 local set = valuetable[op.p[p].val]
 821 |                 if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
 822 |                     p = p + 1
 823 |                 else
 824 |                     p = p + op.p[p].offset
 825 |                 end
 826 |             elseif code == ISpan then
 827 |                 while checkstreamlen(s) do
 828 |                     local c = getstreamchar(s)
 829 |                     local set = valuetable[op.p[p].val]
 830 |                     if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
 831 |                         break
 832 |                     end
 833 |                     s = s + 1
 834 |                 end
 835 |                 p = p + 1
 836 |             end
 837 |         end
 838 |     end
 839 | end
 840 | 
 841 | local function setmax(val)
 842 |     maxstack = val
 843 |     if maxstack < INITBACK then
 844 |         maxstack = INITBACK
 845 |     end
 846 | end
 847 | 
 848 | local function setmaxbehind(val)
 849 |     MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0)
 850 | end
 851 | 
 852 | local function enablememoization(val)
 853 |     usememoization = val
 854 | end
 855 | 
 856 | local function enabletracing(val)
 857 |     trace = val
 858 | end
 859 | 
 860 | -- Get the initial position for the match, interpreting negative
 861 | -- values from the end of the subject
 862 | 
 863 | local function initposition(len, pos)
 864 |     local ii = pos or 1
 865 |     -- positive index?
 866 |     if (ii > 0) then
 867 |         -- inside the string?
 868 |         if ii <= len then
 869 |             return ii - 1; -- return it (corrected to 0-base)
 870 |         else
 871 |             return len; -- crop at the end
 872 |         end
 873 |     else
 874 |         -- negative index
 875 |         -- inside the string?
 876 |         if -ii <= len then
 877 |             return len + ii -- return position from the end
 878 |         else
 879 |             return 0; -- crop at the beginning
 880 |         end
 881 |     end
 882 | end
 883 | 
 884 | local function lp_match(pat, s, init, valuetable, ...)
 885 |     local i = initposition(s:len(), init) + 1
 886 |     return select(2, match(false, true, s, i, pat.code, valuetable, ...))
 887 | end
 888 | 
 889 | local function lp_streammatch(pat, init, valuetable, ...)
 890 |     local params = { ... }
 891 |     local paramslength = select('#', ...)
 892 |     local fce = coroutine.wrap(function(s, last)
 893 |         return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength))
 894 |     end)
 895 |     return fce
 896 | end
 897 | 
 898 | local function retcount(...)
 899 |     return select('#', ...), { ... }
 900 | end
 901 | 
 902 | -- Only for testing purpose
 903 | -- stream emulation (send all chars from string one char after char)
 904 | local function lp_emulatestreammatch(pat, s, init, valuetable, ...)
 905 |     local init = initposition(s:len(), init) + 1
 906 |     local fce = lp_streammatch(pat, init, valuetable, ...)
 907 |     local ret, count = {}, 0
 908 |     for j = 1, #s do
 909 |         local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char
 910 |         if pret[1] == -1 then
 911 |             return -- fail
 912 |         elseif pret[1] == 0 then
 913 |             -- parsing finished
 914 |             -- collect result
 915 |             for i = 2, pcount do
 916 |                 ret[count + i - 1] = pret[i]
 917 |             end
 918 |             count = count + pcount - 1
 919 |             return unpack(ret, 1, count)
 920 |         end
 921 |         for i = 2, pcount do
 922 |             ret[count + i - 1] = pret[i]
 923 |         end
 924 |         count = count + pcount - 1
 925 |     end
 926 |     return select(2, fce(s, true)) -- empty string
 927 | end
 928 | 
 929 | local function lp_load(str, fcetab, usemeta)
 930 |     local index = 0
 931 |     assert(str)
 932 |     local ptr = ffi.cast('const char*', str)
 933 |     local patsize = ffi.cast('uint32_t*', ptr + index)[0]
 934 |     index = index + 4
 935 |     local len = ffi.sizeof(treepatternelement) * patsize
 936 | 
 937 |     local pat
 938 |     if usemeta then
 939 |         pat = treepattern(patsize)
 940 |     else
 941 |         pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))),
 942 |             function(ct)
 943 |                 if ct.code ~= nil then
 944 |                     ffi.C.free(ct.code.p)
 945 |                     ffi.C.free(ct.code)
 946 |                 end
 947 |                 ffi.C.free(ct)
 948 |             end)
 949 |         ffi.fill(pat, ffi.sizeof(treepattern, patsize))
 950 |         pat.treesize = patsize
 951 |         pat.id = 0
 952 |     end
 953 |     ffi.copy(pat.p, ptr + index, len)
 954 |     index = index + len
 955 |     if usemeta then
 956 |         pat.code = pattern()
 957 |     else
 958 |         pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
 959 |         assert(pat.code ~= nil)
 960 |         pat.code.allocsize = 10
 961 |         pat.code.size = 0
 962 |         pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize)
 963 |         assert(pat.code.p ~= nil)
 964 |         ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
 965 |     end
 966 |     pat.code.size = ffi.cast('uint32_t*', ptr + index)[0]
 967 |     index = index + 4
 968 |     local len = pat.code.size * ffi.sizeof(patternelement)
 969 |     local data = ffi.string(ptr + index, len)
 970 |     index = index + len
 971 |     local count = ffi.cast('uint32_t*', ptr + index)[0]
 972 |     index = index + 4
 973 |     local valuetable = {}
 974 |     for i = 1, count do
 975 |         local tag = ffi.string(ptr + index, 3)
 976 |         index = index + 3
 977 |         --string
 978 |         if tag == 'str' then
 979 |             local len = ffi.cast('uint32_t*', ptr + index)[0]
 980 |             index = index + 4
 981 |             local val = ffi.string(ptr + index, len)
 982 |             index = index + len
 983 |             valuetable[#valuetable + 1] = val
 984 |         elseif tag == 'num' then
 985 |             --number
 986 |             local len = ffi.cast('uint32_t*', ptr + index)[0]
 987 |             index = index + 4
 988 |             local val = ffi.string(ptr + index, len)
 989 |             index = index + len
 990 |             valuetable[#valuetable + 1] = tonumber(val)
 991 |         elseif tag == 'cdt' then
 992 |             --ctype
 993 |             local val = settype()
 994 |             ffi.copy(val, ptr + index, ffi.sizeof(settype))
 995 |             index = index + ffi.sizeof(settype)
 996 |             valuetable[#valuetable + 1] = val
 997 |         elseif tag == 'fnc' then
 998 |             --function
 999 |             local len = ffi.cast('uint32_t*', ptr + index)[0]
1000 |             index = index + 4
1001 |             local fname = ffi.string(ptr + index, len)
1002 |             index = index + len
1003 |             len = ffi.cast('uint32_t*', ptr + index)[0]
1004 |             index = index + 4
1005 |             local val = ffi.string(ptr + index, len)
1006 |             index = index + len
1007 |             if fcetab and fcetab[fname] then
1008 |                 assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname))
1009 |                 valuetable[#valuetable + 1] = fcetab[fname]
1010 |             else
1011 |                 valuetable[#valuetable + 1] = loadstring(val)
1012 |             end
1013 |         end
1014 |     end
1015 |     pat.code.allocsize = pat.code.size
1016 |     pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
1017 |     assert(pat.code.p ~= nil)
1018 |     ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize)
1019 |     return pat, valuetable
1020 | end
1021 | 
1022 | local function lp_loadfile(fname, fcetab, usemeta)
1023 |     local file = assert(io.open(fname, 'rb'))
1024 |     local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta)
1025 |     file:close()
1026 |     return pat, valuetable
1027 | end
1028 | 
1029 | -- ======================================================
1030 | 
1031 | return {
1032 |     match = lp_match,
1033 |     streammatch = lp_streammatch,
1034 |     emulatestreammatch = lp_emulatestreammatch,
1035 |     load = lp_load,
1036 |     loadfile = lp_loadfile,
1037 |     setmax = setmax,
1038 |     setmaxbehind = setmaxbehind,
1039 |     enablememoization = enablememoization,
1040 |     enabletracing = enabletracing
1041 | }
1042 | 


--------------------------------------------------------------------------------
/src/re.lua:
--------------------------------------------------------------------------------
  1 | -- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
  2 | -- 2014/08/15 changes rostislav
  3 | 
  4 | -- imported functions and modules
  5 | local tonumber, print, error = tonumber, print, error
  6 | local setmetatable = setmetatable
  7 | local m = require"lpeglj"
  8 | 
  9 | -- 'm' will be used to parse expressions, and 'mm' will be used to
 10 | -- create expressions; that is, 're' runs on 'm', creating patterns
 11 | -- on 'mm'
 12 | local mm = m
 13 | 
 14 | -- pattern's metatable
 15 | local mt = getmetatable(mm.P(0))
 16 | mt = m.version() == "1.0.0.0LJ" and m or mt
 17 | 
 18 | 
 19 | 
 20 | -- No more global accesses after this point
 21 | local version = _VERSION
 22 | if version == "Lua 5.2" then _ENV = nil end
 23 | 
 24 | 
 25 | local any = m.P(1)
 26 | 
 27 | 
 28 | -- Pre-defined names
 29 | local Predef = { nl = m.P"\n" }
 30 | 
 31 | 
 32 | local mem
 33 | local fmem
 34 | local gmem
 35 | 
 36 | 
 37 | local function updatelocale ()
 38 |   mm.locale(Predef)
 39 |   Predef.a = Predef.alpha
 40 |   Predef.c = Predef.cntrl
 41 |   Predef.d = Predef.digit
 42 |   Predef.g = Predef.graph
 43 |   Predef.l = Predef.lower
 44 |   Predef.p = Predef.punct
 45 |   Predef.s = Predef.space
 46 |   Predef.u = Predef.upper
 47 |   Predef.w = Predef.alnum
 48 |   Predef.x = Predef.xdigit
 49 |   Predef.A = any - Predef.a
 50 |   Predef.C = any - Predef.c
 51 |   Predef.D = any - Predef.d
 52 |   Predef.G = any - Predef.g
 53 |   Predef.L = any - Predef.l
 54 |   Predef.P = any - Predef.p
 55 |   Predef.S = any - Predef.s
 56 |   Predef.U = any - Predef.u
 57 |   Predef.W = any - Predef.w
 58 |   Predef.X = any - Predef.x
 59 |   mem = {}    -- restart memoization
 60 |   fmem = {}
 61 |   gmem = {}
 62 |   local mt = {__mode = "v"}
 63 |   setmetatable(mem, mt)
 64 |   setmetatable(fmem, mt)
 65 |   setmetatable(gmem, mt)
 66 | end
 67 | 
 68 | 
 69 | updatelocale()
 70 | 
 71 | 
 72 | 
 73 | local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
 74 | 
 75 | 
 76 | local function getdef (id, defs)
 77 |   local c = defs and defs[id]
 78 |   if not c then error("undefined name: " .. id) end
 79 |   return c
 80 | end
 81 | 
 82 | 
 83 | local function patt_error (s, i)
 84 |   local msg = (#s < i + 20) and s:sub(i)
 85 |                              or s:sub(i,i+20) .. "..."
 86 |   msg = ("pattern error near '%s'"):format(msg)
 87 |   error(msg, 2)
 88 | end
 89 | 
 90 | local function mult (p, n)
 91 |   local np = mm.P(true)
 92 |   while n >= 1 do
 93 |     if n%2 >= 1 then np = np * p end
 94 |     p = p * p
 95 |     n = n/2
 96 |   end
 97 |   return np
 98 | end
 99 | 
100 | local function equalcap (s, i, c)
101 |   if type(c) ~= "string" then return nil end
102 |   local e = #c + i
103 |   if type(s) == 'function' then  -- stream mode
104 |       if s(i, e - 1) == c then return e else return nil end
105 |   else
106 |       if s:sub(i, e - 1) == c then return e else return nil end
107 |   end
108 | end
109 | 
110 | 
111 | local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
112 | 
113 | local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
114 | 
115 | local arrow = S * "<-"
116 | 
117 | local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
118 | 
119 | name = m.C(name)
120 | 
121 | 
122 | -- a defined name only have meaning in a given environment
123 | local Def = name * m.Carg(1)
124 | 
125 | local num = m.C(m.R"09"^1) * S / tonumber
126 | 
127 | local String = "'" * m.C((any - "'")^0) * "'" +
128 |                '"' * m.C((any - '"')^0) * '"'
129 | 
130 | 
131 | local defined = "%" * Def / function (c,Defs)
132 |   local cat =  Defs and Defs[c] or Predef[c]
133 |   if not cat then error ("name '" .. c .. "' undefined") end
134 |   return cat
135 | end
136 | 
137 | local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
138 | 
139 | local item = defined + Range + m.C(any)
140 | 
141 | local Class =
142 |     "["
143 |   * (m.C(m.P"^"^-1))    -- optional complement symbol
144 |   * m.Cf(item * (item - "]")^0, mt.__add) /
145 |                           function (c, p) return c == "^" and any - p or p end
146 |   * "]"
147 | 
148 | local function adddef (t, k, exp)
149 |   if t[k] then
150 |     error("'"..k.."' already defined as a rule")
151 |   else
152 |     t[k] = exp
153 |   end
154 |   return t
155 | end
156 | 
157 | local function firstdef (n, r) return adddef({n}, n, r) end
158 | 
159 | 
160 | local function NT (n, b, p)
161 |   if not b then
162 |     error("rule '"..n.."' used outside a grammar")
163 |   else return mm.V(n, p or 0)
164 |   end
165 | end
166 | 
167 | 
168 | local exp = m.P{ "Exp",
169 |   Exp = S * ( m.V"Grammar"
170 |             + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
171 |   Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
172 |         * (#seq_follow + patt_error);
173 |   Prefix = "&" * S * m.V"Prefix" / mt.__len
174 |          + "!" * S * m.V"Prefix" / mt.__unm
175 |          + m.V"Suffix";
176 |   Suffix = m.Cf(m.V"Primary" * S *
177 |           ( ( m.P"+" * m.Cc(1, mt.__pow)
178 |             + m.P"*" * m.Cc(0, mt.__pow)
179 |             + m.P"?" * m.Cc(-1, mt.__pow)
180 |             + "^" * ( m.Cg(num * m.Cc(mult))
181 |                     + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
182 |                     )
183 |             + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
184 |                          + m.P"{}" * m.Cc(nil, m.Ct)
185 |                          + m.Cg(Def / getdef * m.Cc(mt.__div))
186 |                          )
187 |             + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
188 |             ) * S
189 |           )^0, function (a,b,f) return f(a,b) end );
190 |   Primary = "(" * m.V"Exp" * ")"
191 |             + String / mm.P
192 |             + Class
193 |             + defined
194 |             + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
195 |                      function (n, p) return mm.Cg(p, n) end
196 |             + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
197 |             + m.P"{}" / mm.Cp
198 |             + "{~" * m.V"Exp" * "~}" / mm.Cs
199 |             + "{|" * m.V"Exp" * "|}" / mm.Ct
200 |             + "{" * m.V"Exp" * "}" / mm.C
201 |             + m.P"." * m.Cc(any)
202 |             + (name * m.Cb("G") * (S * ":" * S * num)^-1 * -arrow + "<" * name * m.Cb("G") * (S * ":" * S * num)^-1 * ">") / NT;
203 |   Definition = name * arrow * m.V"Exp";
204 |   Grammar = m.Cg(m.Cc(true), "G") *
205 |             m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
206 |               adddef) / mm.P
207 | }
208 | 
209 | local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
210 | 
211 | 
212 | local function compile (p, defs)
213 |   if mm.type(p) == "pattern" then return p end   -- already compiled
214 |   local cp = pattern:match(p, 1, defs)
215 |   if not cp then error("incorrect pattern", 3) end
216 |   return cp
217 | end
218 | 
219 | local function match (s, p, i)
220 |   local cp = mem[p]
221 |   if not cp then
222 |     cp = compile(p)
223 |     mem[p] = cp
224 |   end
225 |   return cp:match(s, i or 1)
226 | end
227 | 
228 | local function streammatch (p, i)
229 |     local cp = mem[p]
230 |     if not cp then
231 |         cp = compile(p)
232 |         mem[p] = cp
233 |     end
234 |     return cp:streammatch(i or 1)
235 | end
236 | 
237 | -- Only for testing purpose
238 | local function emulatestreammatch(s, p, i)
239 |     local cp = mem[p]
240 |     if not cp then
241 |         cp = compile(p)
242 |         mem[p] = cp
243 |     end
244 |     return cp:emulatestreammatch(s, i or 1)
245 | end
246 | 
247 | local function find (s, p, i)
248 |   local cp = fmem[p]
249 |   if not cp then
250 |     cp = compile(p) / 0
251 |     cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
252 |     fmem[p] = cp
253 |   end
254 |   local i, e = cp:match(s, i or 1)
255 |   if i then return i, e - 1
256 |   else return i
257 |   end
258 | end
259 | 
260 | local function gsub (s, p, rep)
261 |   local g = gmem[p] or {}   -- ensure gmem[p] is not collected while here
262 |   gmem[p] = g
263 |   local cp = g[rep]
264 |   if not cp then
265 |     cp = compile(p)
266 |     cp = mm.Cs((cp / rep + 1)^0)
267 |     g[rep] = cp
268 |   end
269 |   return cp:match(s)
270 | end
271 | 
272 | 
273 | -- exported names
274 | local re = {
275 |   compile = compile,
276 |   match = match,
277 |   streammatch = streammatch,
278 |   emulatestreammatch = emulatestreammatch,
279 |   find = find,
280 |   gsub = gsub,
281 |   updatelocale = updatelocale,
282 | }
283 | 
284 | if version == "Lua 5.1" then _G.re = re end
285 | 
286 | return re
287 | 


--------------------------------------------------------------------------------
/tests/loadtest.lua:
--------------------------------------------------------------------------------
 1 | local vm = require"lpvm"
 2 | local m = require"lpeglj"
 3 | local re = require"re"
 4 | 
 5 | local function checkeq(x, y, p)
 6 |     if p then print(x, y) end
 7 |     if type(x) ~= "table" then assert(x == y)
 8 |     else
 9 |         for k, v in pairs(x) do checkeq(v, y[k], p) end
10 |         for k, v in pairs(y) do checkeq(v, x[k], p) end
11 |     end
12 | end
13 | 
14 | print"Tests for LPegLJ pattern saving and loading"
15 | print("version " .. m.version())
16 | 
17 | local c = re.compile([[
18 |   s <-  ({(!longstring .)+} / longstring)*
19 |   longstring <- '[' {:init: '='* :} '[' close
20 |   close <- ']' =init ']' / . close
21 | ]])
22 | 
23 | local teststring = 'data1[=[insidedata1]=]data2[==[====]==]data3[[]]'
24 | 
25 | local patfile = 'test.pat'
26 | 
27 | local patdata = c:dump()
28 | c:save(patfile)
29 | 
30 | local pat = m.load(patdata)
31 | checkeq({ pat:match(teststring) }, { "data1", "data2", "data3" })
32 | 
33 | local pat = m.loadfile(patfile)
34 | checkeq({ pat:match(teststring) }, { "data1", "data2", "data3" })
35 | 
36 | -- use only vm module (lpvm + lpcap)
37 | local pat, valuetable = vm.load(patdata)
38 | checkeq({ vm.match(pat, teststring, 1, valuetable) }, { "data1", "data2", "data3" })
39 | 
40 | local pat, valuetable = vm.loadfile(patfile)
41 | checkeq({ vm.match(pat, teststring, 1, valuetable) }, { "data1", "data2", "data3" })
42 | 
43 | print('OK')
44 | 


--------------------------------------------------------------------------------
/tests/streamtest2.lua:
--------------------------------------------------------------------------------
  1 | local m = require"lpeglj"
  2 | local re = require"re"
  3 | 
  4 | local function checkeq(x, y, p)
  5 |     if p then print(x, y) end
  6 |     if type(x) ~= "table" then assert(x == y)
  7 |     else
  8 |         for k, v in pairs(x) do checkeq(v, y[k], p) end
  9 |         for k, v in pairs(y) do checkeq(v, x[k], p) end
 10 |     end
 11 | end
 12 | 
 13 | local ret
 14 | 
 15 | print"Tests for LPegLJ stream mode"
 16 | 
 17 | assert(type(m.version()) == "string")
 18 | print("version " .. m.version())
 19 | 
 20 | local pat = m.C('abcd') * m.C('x')
 21 | local fce = pat:streammatch()
 22 | 
 23 | ret = { fce("a") }
 24 | checkeq(ret, { 1 })
 25 | ret = { fce("b") }
 26 | checkeq(ret, { 1 })
 27 | ret = { fce("c") }
 28 | checkeq(ret, { 1 })
 29 | ret = { fce("d") }
 30 | checkeq(ret, { 1, "abcd" })
 31 | ret = { fce("x") }
 32 | checkeq(ret, { 0, 'x' })
 33 | 
 34 | local pat = m.C('abcd') * m.C('x') + m.C('abcd') * m.C('y')
 35 | local fce = pat:streammatch()
 36 | ret = { fce("abcd") }
 37 | checkeq(ret, { 1 })
 38 | ret = { fce("y") }
 39 | checkeq(ret, { 0, "abcd", "y" })
 40 | 
 41 | local pat = m.C('abcd') ^ 0 * m.C('x')
 42 | local fce = pat:streammatch()
 43 | for i = 1, 1e3 do
 44 |     ret = { fce("ab") }
 45 |     checkeq(ret, { 1 })
 46 |     ret = { fce("cd") }
 47 |     checkeq(ret, { 1, "abcd" })
 48 | end
 49 | ret = { fce("x") }
 50 | checkeq(ret, { 0, "x" })
 51 | 
 52 | local pat = (m.C('abcd') / "out") ^ 0 * m.C('x')
 53 | local fce = pat:streammatch()
 54 | for i = 1, 1e3 do
 55 |     ret = { fce("ab") }
 56 |     checkeq(ret, { 1 })
 57 |     ret = { fce("cd") }
 58 |     checkeq(ret, { 1, "out" })
 59 | end
 60 | ret = { fce("x") }
 61 | checkeq(ret, { 0, "x" })
 62 | 
 63 | local pat = (m.C('abcd') / "pattern1" + m.C('efgh') / "pattern2" + (m.P(1) - 'xyz')) ^ 0 * (m.C("xyz") / "pattern3")
 64 | local fce = pat:streammatch()
 65 | 
 66 | for i = 1, 1e3 do
 67 |     ret = { fce("ef") }
 68 |     checkeq(ret, { 1 })
 69 |     ret = { fce("gh") }
 70 |     checkeq(ret, { 1, "pattern2" })
 71 |     ret = { fce("a") }
 72 |     checkeq(ret, { 1 })
 73 |     ret = { fce("bcd") }
 74 |     checkeq(ret, { 1, "pattern1" })
 75 | end
 76 | ret = { fce("xyz") }
 77 | checkeq(ret, { 0, "pattern3" })
 78 | 
 79 | local pat = m.P('abcd') * -1
 80 | local fce = pat:streammatch()
 81 | ret = { fce("abc") }
 82 | checkeq(ret, { 1 })
 83 | ret = { fce("d") }
 84 | checkeq(ret, { 1 })
 85 | ret = { fce("", true) }
 86 | checkeq(ret, { 0, 5 })
 87 | 
 88 | local field = '"' * m.Cs(((m.P(1) - '"') + m.P'""' / '"') ^ 0) * '"' +
 89 |         m.C((1 - m.S',\n"') ^ 0)
 90 | 
 91 | local record = field * (',' * field) ^ 0 * (m.P'\n' + -1)
 92 | 
 93 | local fce = record:streammatch()
 94 | ret = { fce('ab') }
 95 | checkeq(ret, { 1 })
 96 | ret = { fce('c') }
 97 | checkeq(ret, { 1 })
 98 | ret = { fce(',"def",') }
 99 | checkeq(ret, { 1, 'abc', 'def' })
100 | ret = { fce('x', true) }
101 | checkeq(ret, { 0, 'x' })
102 | 
103 | record = re.compile[[
104 |   record <- field (',' field)*  (%nl / !.)
105 |   field <- escaped / nonescaped
106 |   nonescaped <- { [^,"%nl]* }
107 |   escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
108 | ]]
109 | 
110 | local fce = record:streammatch()
111 | ret = { fce("a") }
112 | checkeq(ret, { 1 })
113 | ret = { fce("bc,") }
114 | checkeq(ret, { 1, 'abc' })
115 | ret = { fce("def", true) }
116 | checkeq(ret, { 0, 'def' })
117 | 
118 | local c = re.compile([[
119 |   s <-  ({(!longstring .)+} / longstring)*
120 |   longstring <- '[' {:init: '='* :} '[' close
121 |   close <- ']' =init ']' / . close
122 | ]])
123 | 
124 | local teststring = 'data1[=[insidedata1]=]data2[==[====]==]data3[[]]'
125 | 
126 | local output = { 'data1', 'data2', 'data3' }
127 | 
128 | local fce = c:streammatch()
129 | 
130 | local index = 1
131 | 
132 | for i = 1, #output do
133 |     local status, data
134 |     repeat
135 |         status, data = fce(teststring:sub(index, index), index == #teststring)
136 |         index = index + 1
137 |     until data or status ~= 1
138 |     checkeq(output[i], data)
139 | end
140 | 
141 | local pat = m.C('a') * m.Cg('b', 'backref1') * m.C('c') * m.Cg('d', 'backref2') * m.C('e') * m.Cg('f', 'backref3') *
142 |         m.Cb('backref1') * m.C('g') * m.Cb('backref2') * m.C('h') * m.Cb('backref3') * m.C('i')
143 | local fce = pat:streammatch()
144 | 
145 | ret = { fce("a") }
146 | checkeq(ret, { 1, 'a' })
147 | ret = { fce("b") }
148 | checkeq(ret, { 1 })
149 | ret = { fce("c") }
150 | checkeq(ret, { 1, "c" })
151 | ret = { fce("d") }
152 | checkeq(ret, { 1, })
153 | ret = { fce("e") }
154 | checkeq(ret, { 1, "e" })
155 | ret = { fce("f") }
156 | checkeq(ret, { 1, "b" })
157 | ret = { fce("g") }
158 | checkeq(ret, { 1, "g", "d" })
159 | ret = { fce("h") }
160 | checkeq(ret, { 1, "h", "f" })
161 | ret = { fce("i") }
162 | checkeq(ret, { 0, "i" })
163 | 
164 | local pat = m.C('a') * (m.Cg(1, 'backref') * m.C('x1') * m.Cb('backref') + m.Cg(1, 'backref') * m.C('x2') * m.Cb('backref'))
165 | local fce = pat:streammatch()
166 | ret = { fce("a") }
167 | checkeq(ret, { 1, 'a' })
168 | ret = { fce("x") }
169 | checkeq(ret, { 1 })
170 | ret = { fce("x") }
171 | checkeq(ret, { 1 })
172 | ret = { fce("2") }
173 | checkeq(ret, { 0, 'x2', 'x' })
174 | 
175 | 
176 | local pat = m.C('a') * m.Ct(m.Cg('b', 'index')) * m.C('c')
177 | local fce = pat:streammatch()
178 | 
179 | ret = { fce("a") }
180 | checkeq(ret, { 1, 'a' })
181 | ret = { fce("b") }
182 | checkeq(ret, { 1, { index = 'b' } })
183 | ret = { fce("c") }
184 | checkeq(ret, { 0, 'c' })
185 | 
186 | print('OK')
187 | 
188 | 


--------------------------------------------------------------------------------
/tests/testlr.lua:
--------------------------------------------------------------------------------
  1 | local lpeg = require"lpeglj"
  2 | local re = require"re"
  3 | 
  4 | local m = lpeg
  5 | 
  6 | local function checkeq(x, y, p)
  7 |     if p then print(x, y) end
  8 |     if type(x) ~= "table" then assert(x == y)
  9 |     else
 10 |         for k, v in pairs(x) do checkeq(v, y[k], p) end
 11 |         for k, v in pairs(y) do checkeq(v, x[k], p) end
 12 |     end
 13 | end
 14 | 
 15 | print"Tests for LPegLJ left recursion"
 16 | 
 17 | assert(type(m.version()) == "string")
 18 | print("version " .. m.version())
 19 | 
 20 | m.enableleftrecursion(true)
 21 | 
 22 | --[[
 23 | direct left recursion
 24 | E ← E + n / n
 25 | --]]
 26 | 
 27 | local pat = m.P{
 28 |     "E";
 29 |     E = m.V"E" * '+' * "n" + "n",
 30 | }
 31 | 
 32 | assert(pat:match("n+n+n") == 6)
 33 | 
 34 | --[[
 35 | indirect left recursion
 36 | L ← P.x / x
 37 | P ← P(n) / L
 38 | --]]
 39 | 
 40 | local pat = m.P{
 41 |     "L";
 42 |     L = m.V"P" * ".x" + "x",
 43 |     P = m.V"P" * "(n)" + m.V"L"
 44 | }
 45 | 
 46 | assert(pat:match("x(n)(n).x(n).x") == 15)
 47 | 
 48 | --[[
 49 | left and right recursion with precedence rules
 50 | E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n
 51 | --]]
 52 | 
 53 | 
 54 | local pat = m.P{
 55 |     "E",
 56 |     E = m.V("E", 1) * m.S'+-' * m.V("E", 2) +
 57 |             m.V("E", 2) * m.S'*/' * m.V("E", 3) +
 58 |             m.V("E", 3) * '**' * m.V("E", 3) +
 59 |             '-' * m.V("E", 4) +
 60 |             '(' * m.V("E") * ')' +
 61 |             m.R'09' ^ 1,
 62 | }
 63 | 
 64 | assert(pat:match("-1*(6+2/4+3-1)**2") == 18)
 65 | 
 66 | --[[
 67 | left and right recursion with precedence rules
 68 | E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n
 69 | create AST tree
 70 | --]]
 71 | 
 72 | 
 73 | local pat = m.P{
 74 |     "E",
 75 |     E = m.Ct(m.V("E", 1) * m.C(m.S'+-') * m.V("E", 2) +
 76 |             m.V("E", 2) * m.C(m.S'*/') * m.V("E", 3) +
 77 |             m.V("E", 3) * m.C('**') * m.V("E", 3) +
 78 |             m.C('-') * m.V("E", 4) +
 79 |             '(' * m.V("E") * ')' +
 80 |             m.C(m.R'09' ^ 1)),
 81 | }
 82 | 
 83 | local ASTtree = pat:match("1+1+1")
 84 | checkeq(ASTtree, { { { "1" }, "+", { "1" } }, "+", { "1" } })
 85 | 
 86 | local ASTtree = pat:match("-1*(6+2/4+3-1)**2")
 87 | checkeq(ASTtree, { { "-", { "1" } }, "*", { { { { { { "6" }, "+", { { "2" }, "/", { "4" } } }, "+", { "3" } }, "-", { "1" } } }, "**", { "2" } } })
 88 | 
 89 | -- using re module with precedence (the same example as above)
 90 | -- call_nonterminal : precedence_level or <call_nonterminal : precedence_level >
 91 | 
 92 | local pat = [[
 93 |      E <- (E:1 {[+-]} E:2 /
 94 |           E:2 {[*/]} E:3 /
 95 |           E:3 {'**'} E:3 /
 96 |           {'-'} E:4 /
 97 |           '(' E ')' /
 98 |           {[0-9]+}) -> {}
 99 | ]]
100 | 
101 | local ASTtree = re.match("-1*(6+2/4+3-1)**2", pat)
102 | checkeq(ASTtree, { { "-", { "1" } }, "*", { { { { { { "6" }, "+", { { "2" }, "/", { "4" } } }, "+", { "3" } }, "-", { "1" } } }, "**", { "2" } } })
103 | 
104 | --[[
105 | simple evaluator
106 | E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n
107 | --]]
108 | 
109 | local eval = function(s, i, p1, p2, p3)
110 |     local res
111 |     if p2 == '+' then
112 |         res = p1 + p3
113 |     elseif p2 == '-' then
114 |         res = p1 - p3
115 |     elseif p2 == '*' then
116 |         res = p1 * p3
117 |     elseif p2 == '/' then
118 |         res = p1 / p3
119 |     elseif p1 == '-' then
120 |         res = -p2
121 |     elseif p2 == '**' then
122 |         res = p1 ^ p3
123 |     else
124 |         res = p1
125 |     end
126 |     return true, res
127 | end
128 | 
129 | 
130 | local pat = m.P{
131 |     "E",
132 |     E = m.Cmt(m.V("E", 1) * m.C(m.S'+-') * m.V("E", 2) +
133 |             m.V("E", 2) * m.C(m.S'*/') * m.V("E", 3) +
134 |             m.V("E", 3) * m.C('**') * m.V("E", 3) +
135 |             m.C('-') * m.V("E", 4) +
136 |             '(' * m.V("E") * ')' +
137 |             m.C(m.R'09' ^ 1), eval),
138 | }
139 | 
140 | assert(pat:match("-1*(6+2/4+3-1)**2") == -72.25)
141 | 
142 | 
143 | local pat = m.P{
144 |     "E",
145 |     E = m.V("E", 1) * '+' * m.V("E", 2) / function(c1, c2) return c1 + c2 end +
146 |             m.V("E", 1) * '-' * m.V("E", 2) / function(c1, c2) return c1 - c2 end +
147 |             m.V("E", 2) * '*' * m.V("E", 3) / function(c1, c2) return c1 * c2 end +
148 |             m.V("E", 2) * '/' * m.V("E", 3) / function(c1, c2) return c1 / c2 end +
149 |             m.V("E", 3) * '**' * m.V("E", 3) / function(c1, c2) return c1 ^ c2 end +
150 |             '-' * m.V("E", 4) / function(c1) return -c1 end +
151 |             '(' * m.V("E") * ')' +
152 |             m.C(m.R'09' ^ 1),
153 | }
154 | 
155 | assert(pat:match("-1*(6+2/4+3-1)**2") == -72.25)
156 | 
157 | local def = {
158 |     plus = function(p1, p2) return p1 + p2 end,
159 |     minus = function(p1, p2) return p1 - p2 end,
160 |     mult = function(p1, p2) return p1 * p2 end,
161 |     div = function(p1, p2) return p1 / p2 end,
162 |     pow = function(p1, p2) return p1 ^ p2 end,
163 |     uminus = function(p1) return -p1 end,
164 |     errfce = function(o, i)
165 |         local errstr = o .. '\n' .. (' '):rep(i) .. '^' .. '\n'
166 |         io.write(errstr)
167 |         return false
168 |     end,
169 | }
170 | 
171 | local pat = [[
172 |      P <-  E s (!. / error)
173 |      s <- %s*
174 |      error <- '' => errfce
175 |      E <- (E:1 s'+' E:2) -> plus /
176 |           (E:1 s'-' E:2) -> minus /
177 |           (E:2 s'*' E:3) -> mult /
178 |           (E:2 s'/' E:3) -> div /
179 |           (E:3 s'**' E:3)-> pow /
180 |           (s'-' E:4) -> uminus /
181 |           s'(' E s')' /
182 |           s{[0-9]+} /
183 |           error
184 | ]]
185 | 
186 | local pat = re.compile(pat, def)
187 | assert(re.match("-1 * (6 + 2 / 4 + 3 - 1)**2", pat) == -72.25)
188 | 
189 | local pat = [[
190 |      A <-  B "a"
191 |      B <-  C "b"
192 |      C <-  B / A / "c"
193 | ]]
194 | 
195 | local pat = re.compile(pat)
196 | assert(re.match("cbbabbba", pat) == 9)
197 | 
198 | local pat = [[
199 |      S <- A / B
200 |      A <- A "a" / B / "a"
201 |      B <- B "b" / A / "b"
202 | ]]
203 | 
204 | local pat = re.compile(pat)
205 | assert(re.match("baabbaaa", pat) == 9)
206 | 
207 | print"OK"
208 | 


--------------------------------------------------------------------------------