├── LICENSE ├── README.md ├── init.lua ├── pegex-0.2-1.rockspec ├── pegex.lua ├── regex_find.lua └── test ├── Makefile ├── test.lua ├── test_bugs.lua ├── test_numgroups.lua ├── test_regex.lua ├── test_regex2.lua ├── test_regex_backref.lua └── test_regex_cap.lua /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Chris Emerson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ta-regex/Pegex 2 | ============== 3 | 4 | Pegex is a regular expression (regexp) implementation built on top of LPeg. 5 | 6 | The original motivation was to add regular expression search support for 7 | the Textadept editor; however the underlying engine is generic. 8 | 9 | This module replaces the default text search with one which uses regular 10 | expressions. 11 | 12 | Currently the full regular expressions are supported (not including eg Perl 13 | extensions, though some are planned); this is more than the subset supported 14 | natively in Textadept (which eg don't include "|"). 15 | 16 | | Syntax | Matches | 17 | |--------|---------| 18 | | . | Any character except newline | 19 | | [abA-Z]| The characters a,b, or any capital letter | 20 | | \\< | Zero-length, matches just before the start of a word | 21 | | \\> | Zero-length, matches just after the end of a word | 22 | | foo|bar | Match foo or bar | 23 | | (pat) | Match the same as pat (subgroup) | 24 | | (?:pat) | Match the same as pat (subgroup), non-capturing | 25 | | x* | Match zero or more x | 26 | | x+ | match one or more x | 27 | | x? | match zero or one x | 28 | | \\x | Where x is one of: ()\\?*+|. : match the character x | 29 | | \\w | Any "word" character [a-zA-Z_] | 30 | | \\W | Any non-"word" character [^a-zA-Z_] | 31 | | \\d | Any digit character [0-9] | 32 | | \\D | Any non-digit character [^0-9] | 33 | | \\s | Any whitespace character [ \\t\\n\\v\\r] | 34 | | \\S | Any non-whitespace character [^ \\t\\n\\v\\r] | 35 | | \\1 ... \\9 | Back reference to Nth (subgroup) | 36 | 37 | Installation 38 | ------------ 39 | To install Pegex standalone, use "luarocks install pegex". 40 | Example usage: 41 | 42 | ```lua 43 | local pegex = require('pegex') 44 | pat = pegex.compile('(?:foo|bar)+') 45 | result = pat:match("asdfoo") -- returns { _start=4, _end=6} 46 | result = pat:match("asdf") -- returns nil (not found) 47 | ``` 48 | See the tests for examples using captures and backreferences. 49 | 50 | To use with Textadept to replace the default search method: 51 | 52 | Add the ta-regex directory to ~/.textadept/modules/ 53 | 54 | Add the following line to ~/.textadept/init.lua 55 | 56 | ```lua 57 | local ta_regex = require 'ta-regex' 58 | ta_regex.install() 59 | ``` 60 | 61 | Internal details 62 | ---------------- 63 | The module adds a handler for events.FIND to intercept searches. Regular 64 | expressions are converted to equivalent LPEG patterns, which are then used 65 | for searching the text. 66 | 67 | The regex-to-LPEG conversion can be used independently. -------------------------------------------------------------------------------- /init.lua: -------------------------------------------------------------------------------- 1 | -- Copyright (C) 2014 Chris Emerson 2 | -- See LICENSE for details (MIT license). 3 | 4 | local M = require 'ta-regex.regex_find' 5 | 6 | return M -------------------------------------------------------------------------------- /pegex-0.2-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "pegex" 2 | version = "0.2-1" 3 | source = { 4 | url = "git://github.com/jugglerchris/ta-regex", 5 | tag = "v0.2" 6 | } 7 | description = { 8 | summary = "Regular expression/regexp implementation using LPeg.", 9 | detailed = [[An implementation of regular expressions using LPeg, based on the paper at: 10 | http://www.inf.puc-rio.br/~roberto/docs/ry10-01.pdf 11 | ]] , 12 | 13 | homepage = "http://github.com/jugglerchris/ta-regex", 14 | license = "MIT" 15 | } 16 | dependencies = { 17 | "lua >= 5.1, < 5.4", 18 | "lpeg >= 0.12", 19 | } 20 | build = { 21 | type = "builtin", 22 | modules = { 23 | pegex = "pegex.lua", 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /pegex.lua: -------------------------------------------------------------------------------- 1 | -- Copyright (C) 2014 Chris Emerson 2 | -- See LICENSE for details (MIT license). 3 | 4 | -- Support for regular expressions (parsed and implemented with LPeg). 5 | local M = {} 6 | 7 | local lpeg = require('lpeg') 8 | local P = lpeg.P 9 | local R = lpeg.R 10 | local S = lpeg.S 11 | local C = lpeg.C 12 | local V = lpeg.V 13 | local B = lpeg.B 14 | local Carg = lpeg.Carg 15 | local Cb = lpeg.Cb 16 | local Cc = lpeg.Cc 17 | local Cf = lpeg.Cf 18 | local Cp = lpeg.Cp 19 | local Cg = lpeg.Cg 20 | local Ct = lpeg.Ct 21 | local Cmt = lpeg.Cmt 22 | 23 | -- We use the algorithm to convert from a regular expression to a Peg 24 | -- expression from: 25 | -- http://www.inf.puc-rio.br/~roberto/docs/ry10-01.pdf 26 | 27 | local function sub1(x) 28 | return x - 1 29 | end 30 | 31 | -- Parts of a regular expression, returning an LPEG pattern which matches it. 32 | local _start = Cg(Cp(), "_start") 33 | local _end = Cg(Cp()/sub1, "_end") 34 | local mt = { 35 | __index = { 36 | match = function(t, s, index) 37 | local result = t._pat:match(s, index) 38 | 39 | if result == nil then return result end 40 | -- Post-process to put the matches into a nicer form 41 | local groups = nil 42 | for k,v in pairs(result) do 43 | if k:sub(1,1) == "s" then 44 | local grpname= k:sub(2) 45 | local endpos = result["e"..grpname] 46 | if v and endpos then 47 | if grpname:match("(%d+)") then 48 | grpname = tonumber(grpname) 49 | end 50 | groups = groups or {} 51 | groups[grpname] = {v,endpos} 52 | result[k] = nil 53 | result["e"..grpname] = nil 54 | end 55 | end 56 | end 57 | result.groups = groups 58 | return result 59 | end, 60 | }, 61 | } 62 | 63 | -- Make special character sets 64 | local function make_b_s() 65 | return { { " \t\v\n\r", [0]="set" }, [0]="charset" } 66 | end 67 | local function make_b_S() 68 | return { { " \t\v\n\r", [0]="set" }, [0]="charset", 69 | negate=true} 70 | end 71 | local function make_b_w() 72 | return { [0]="charset", 73 | { [0]="range", "a", "z" }, 74 | { [0]="range", "A", "Z" }, 75 | { [0]="range", "0", "9" }, 76 | { [0]="char", "_" }, 77 | } 78 | end 79 | local function make_b_W() 80 | return { [0]="charset", 81 | { [0]="range", "a", "z" }, 82 | { [0]="range", "A", "Z" }, 83 | { [0]="range", "0", "9" }, 84 | { [0]="char", "_" }, 85 | negate=true 86 | } 87 | end 88 | local function make_b_d() 89 | return { [0]="charset", 90 | { [0]="range", "0", "9" }, 91 | } 92 | end 93 | local function make_b_D() 94 | return { [0]="charset", 95 | { [0]="range", "0", "9" }, 96 | negate=true, 97 | } 98 | end 99 | local function make_charset(c) 100 | return function() return { [0]="charset", { [0]="char", c } } end 101 | end 102 | local function make_char(c) 103 | return function() return { [0]="char", c } end 104 | end 105 | 106 | local special = S"()\\?*+|.^$" 107 | local any = P"." * Cc({[0] = "."}) 108 | 109 | -- Perl-style character classes 110 | local b_s = P"\\s" / make_b_s 111 | local b_S = P"\\S" / make_b_S 112 | local b_w = P"\\w" / make_b_w 113 | local b_W = P"\\W" / make_b_W 114 | local b_d = P"\\d" / make_b_d 115 | local b_D = P"\\D" / make_b_D 116 | local b_t = P"\\t" / make_charset('\t') 117 | local b_n = P"\\n" / make_charset('\n') 118 | local b_r = P"\\r" / make_charset('\r') 119 | local b_f = P"\\f" / make_charset('\f') 120 | local b_e = P"\\e" / make_charset('\x1b') 121 | local b_a = P"\\a" / make_charset('\x07') 122 | 123 | local backcharset = b_s + b_S + b_w + b_W + b_d + b_D + 124 | b_t + b_n + b_r + b_f + b_e + b_a 125 | local charset_special = S"]-" 126 | local charset_escapes = (b_t + b_n + b_r + b_f + b_e + b_a) / 127 | function(c) return c[1] end 128 | local charset_char = C(P(1) - charset_special) / 129 | function(c) return { [0] = "char", c } end 130 | local range = (C(P(1) - charset_special) * P"-" * C(P(1) - charset_special)) / 131 | function(a,b) return { [0]="range", a, b } end 132 | local charset = (P"[" * 133 | Ct((Cg(P"^"*Cc(true), "negate") + P(0)) 134 | * (range + charset_escapes + charset_char)^0) * 135 | P"]") / 136 | function(x) x[0] = "charset" return x end 137 | local char = C(P(1) - special) / function(c) return { [0] = "char", c } end 138 | local escapechar = (P"\\" * C(special)) / function(c) return { [0] = "char", c } end 139 | local backref = (P"\\" * C(R"19")) / function(c) return { tonumber(c), [0] = "backref" } end 140 | 141 | local wordchar = R("AZ", "az", "09") + S("_") 142 | local nonwordchar = 1 - wordchar 143 | 144 | -- word boundaries 145 | local word_start = P"\\<" * Cc({[0] = "\\<"}) 146 | local word_end = P"\\>" * Cc({[0] = "\\>"}) 147 | 148 | -- {n} etc. Returns two captures - (min, max); max can be nil (no max) 149 | local count_exact = (P"{" * C(R"09" ^ 1) * P"}") / function(c) return tonumber(c), tonumber(c) end 150 | local count_minmax = (P"{" * C(R"09" ^ 1) * P"," * C(R"09" ^ 1) * P"}") / function(min,max) return tonumber(min), tonumber(max) end 151 | local count_min = (P"{" * C(R"09" ^ 1) * P",}") / function(c) return tonumber(c), nil end 152 | local brace_count = count_exact + count_minmax + count_min 153 | 154 | -- Grouping 155 | local newgrp = (Cb("groups") * Cp()) / 156 | function(groups, pos) 157 | local grp = #groups+1 158 | groups[grp] = {pos} 159 | groups.open[#groups.open+1] = grp 160 | end 161 | 162 | -- endgrp leaves the group number or name as a capture 163 | local endgrp = (Cb("groups") * Cp()) / 164 | function(groups, pos) 165 | local grp = groups.open[#groups.open] 166 | groups.open[#groups.open] = nil 167 | groups[grp][2] = pos 168 | return grp 169 | end 170 | 171 | local bra = P"(" * newgrp 172 | local ket = P")" * endgrp 173 | 174 | local anonbra = P"(?:" 175 | local anonket = P")" 176 | 177 | local pattern = P{ 178 | "pattern", 179 | 180 | -- A complete pattern, starting from an empty pattern. 181 | pattern = Cg(Carg(1),"groups") * Ct((P"^"*Cg(Cc(1),"anchorstart") + P(0)) * V"subpat" * (P"$"*(-P(1))*Cg(Cc(1),"anchorend") + (-P(1)))) / 182 | function(t) t[0] = "pattern" ; return t end, 183 | 184 | -- A set of alternate branches 185 | subpat = (V"branch" * (P"|" * V"branch") ^ 0) / 186 | function(...) return { [0] = "alt", ... } end, 187 | 188 | branch = V"concat", 189 | 190 | -- A set of concatenated pieces 191 | -- Pass a dummy capture to avoid the special case of no captures confusing 192 | -- the function. 193 | concat = Cc(nil) * (V"piece" ^ 0) / 194 | function(_, ...) return { [0] = "concat", ... } end, 195 | 196 | piece = V"atom_multi", 197 | 198 | atom_multi = V"atom_plus" + V"atom_star" + V"atom_query" + V"atom_count" + V"atom", 199 | 200 | atom_plus = (V"atom" * P"+") / 201 | function(atom) return { [0] = "+", atom } end, 202 | atom_star = (V"atom" * P"*") / 203 | function(atom) return { [0] = "*", atom } end, 204 | atom_query = (V"atom" * P"?") / 205 | function(atom) return { [0] = "?", atom } end, 206 | atom_count = (V"atom" * brace_count) / 207 | function(atom, min, max) return { [0] = "{}", min=min, max=max, atom } end, 208 | 209 | anongroup = (anonbra * V"subpat" * anonket), 210 | group = (bra * V"subpat" * ket) / 211 | function(subpat, grpname) return { [0] = "group", subpat, grpname } end, 212 | atom = any + word_start + word_end + escapechar + charset + V"anongroup" + V"group" + char + backref + backcharset, 213 | } 214 | 215 | local function foldr(f, t, init) 216 | local res = init 217 | local start = #t 218 | if res == nil then 219 | res = t[start] 220 | start = start - 1 221 | end 222 | 223 | for i=start,1,-1 do 224 | res = f(t[i], res) 225 | end 226 | return res 227 | end 228 | 229 | local function map(f, t) 230 | local result = {} 231 | for i=1,#t do 232 | result[i] = f(t[i]) 233 | end 234 | return result 235 | end 236 | 237 | local function add(a,b) 238 | return a+b 239 | end 240 | 241 | -- Convert a charset fragment to a PEG 242 | local function charset_to_peg(charfrag) 243 | local t = charfrag[0] 244 | if t == "char" then 245 | assert(#charfrag == 1) 246 | return P(charfrag[1]) 247 | elseif t == "range" then 248 | assert(#charfrag == 2) 249 | return R(charfrag[1] .. charfrag[2]) 250 | elseif t == "set" then 251 | return S(charfrag[1]) 252 | else 253 | error("Got charset bit: "..tostring(t).."/"..tostring(t and t[0])) 254 | end 255 | end 256 | 257 | local function re_to_peg(retab, k, patternProps) 258 | local t = retab[0] 259 | if t == "pattern" then 260 | assert(#retab == 1) 261 | local pat = re_to_peg(retab[1], k, patternProps) 262 | -- If the pattern is anchored at the end, make it fail to match 263 | -- if there's another byte. This must be done *before* wrapping 264 | -- with the start/end markers, as once they've matched it's too late 265 | -- to match the next item. 266 | if retab.anchorend then 267 | -- Disallow matching anything afterwards. 268 | pat = pat * (-P(1)) 269 | end 270 | -- Add match start/end markers 271 | pat = _start * pat * _end 272 | if not retab.anchorstart then 273 | -- Match the pattern, or a character and try again. 274 | pat = P{pat + 1*V(1)} 275 | end 276 | return pat 277 | elseif t == "group" then 278 | assert(#retab == 2) 279 | -- print(debug.traceback()) 280 | patternProps.numGroups = patternProps.numGroups + 1 281 | local grpname = tostring(retab[2]) 282 | local newk = Cg(Cp()/sub1, "e"..grpname) * k 283 | local pat = re_to_peg(retab[1], newk, patternProps) 284 | pat = Cg(Cp(), "s"..grpname) * pat 285 | return pat 286 | elseif t == "alt" then 287 | if #retab == 1 then 288 | return re_to_peg(retab[1], k, patternProps) 289 | else 290 | local parts = map(function(x) 291 | return re_to_peg(x, k, patternProps) 292 | end, retab) 293 | return foldr(add, parts) 294 | end 295 | elseif t == "concat" then 296 | return foldr(function(retab_f, k_f) return re_to_peg(retab_f, k_f, patternProps) end, retab, k) 297 | elseif t == "char" then 298 | assert(#retab == 1) 299 | return P(retab[1]) * k 300 | elseif t == "charset" then 301 | local charset_pat = foldr(add, map(charset_to_peg, retab)) 302 | if retab.negate then 303 | charset_pat = 1 - charset_pat 304 | end 305 | return charset_pat * k 306 | elseif t == "*" then 307 | return P{"A", A=re_to_peg(retab[1], V"A", patternProps) + k} 308 | elseif t == "+" then 309 | return re_to_peg(retab[1], P{"A", A=re_to_peg(retab[1], V"A", patternProps) + k}, patternProps) 310 | elseif t == "." then 311 | assert(#retab == 0) 312 | return (P(1) - P"\n") * k 313 | elseif t == "?" then 314 | assert(#retab == 1) 315 | return re_to_peg(retab[1], k, patternProps) + k 316 | elseif t == "{}" then 317 | assert(#retab == 1) 318 | -- Rewrite this in terms of ? and *. 319 | -- X{3,} => XXXX* 320 | -- X{3,5} => XXXX?X? 321 | local subpat = retab[1] 322 | local min = retab.min 323 | local max = retab.max 324 | local rewritten = { [0] = "concat" } 325 | for i=1,min do 326 | rewritten[#rewritten+1] = subpat 327 | end 328 | if max == nil then 329 | rewritten[#rewritten+1] = { [0] = "*", subpat } 330 | else 331 | local optional = { [0] = "?", subpat } 332 | for i=min+1,max do 333 | rewritten[#rewritten+1] = optional 334 | end 335 | end 336 | return re_to_peg(rewritten, k, patternProps) 337 | elseif t == "\\<" then 338 | assert(#retab == 0) 339 | return -B(wordchar) * #wordchar * k 340 | elseif t == "\\>" then 341 | assert(#retab == 0) 342 | return B(wordchar) * (-#wordchar) * k 343 | elseif t == "backref" then 344 | local grpname = retab[1] 345 | return Cmt(P(0) * Cb("s"..grpname) * Cb("e"..grpname), 346 | function(subject, pos, s, e) 347 | local backval = subject:sub(s, e) 348 | local here = subject:sub(pos, pos+e-s) 349 | if backval == here then 350 | return pos+e-s+1 351 | else 352 | return false 353 | end 354 | end) 355 | else 356 | error("Not implemented op: " ..tostring(t) .. "/" .. tostring(retab)) 357 | end 358 | end 359 | 360 | function M.parse(re) 361 | return pattern:match(re, 1, {open={}}) 362 | end 363 | function M.compile(re) 364 | -- Since the RE->Peg construction starts backwards (using the 365 | -- continuation), it's more convenient to parse the regular expression 366 | -- backwards. 367 | local retab = M.parse(re) 368 | if retab == nil then 369 | error("Failed to parse regular expression: {"..re.."}", 2) 370 | end 371 | local patternProps = { numGroups = 0 } 372 | local _pat = re_to_peg(retab, P(0), patternProps) 373 | return setmetatable({ 374 | _pat = Ct(_pat), 375 | numgroups = patternProps.numGroups 376 | }, mt) 377 | end 378 | 379 | -- Increase match complexity 380 | lpeg.setmaxstack(1000) 381 | 382 | return M 383 | -------------------------------------------------------------------------------- /regex_find.lua: -------------------------------------------------------------------------------- 1 | -- Copyright (C) 2014 Chris Emerson 2 | -- See LICENSE for details (MIT license). 3 | local M = {} 4 | 5 | local ta_regex = require 'ta-regex.pegex' 6 | 7 | -- Replace textadept's events.FIND handler with one implementing better regex. 8 | 9 | function M.install() 10 | events.connect(events.FIND, M.find, 1) 11 | end 12 | 13 | -- Find expression forwards from the current point. 14 | function M.find(regex, forward) 15 | local pat = ta_regex.compile(regex) 16 | 17 | -- Search a subset of the buffer, and adjust the match to set the 18 | -- start/end pointers correctly. 19 | local function search(startpos, endpos) 20 | local m = pat:match(buffer:text_range(startpos, endpos)) 21 | if m then 22 | -- Adjust result to take account of startpos 23 | m._start = m._start + startpos - 1 24 | m._end = m._end + startpos 25 | end 26 | return m 27 | end 28 | 29 | -- As search(), but search backwards. 30 | -- This isn't as efficient, as it searches forward and waits for the 31 | -- last match. 32 | local function search_rev(startpos, endpos) 33 | local res = nil 34 | while true do 35 | local m = search(startpos, endpos) 36 | if m then 37 | -- a later match than we'd previously had 38 | res = m 39 | 40 | -- Start searching from this point (non-overlapping) 41 | startpos = m._end 42 | else 43 | -- no other matches - return the last we got. 44 | break 45 | end 46 | end 47 | return res 48 | end 49 | 50 | local m = nil 51 | if forward then 52 | local startpos = buffer.current_pos + 1 53 | local endpos = buffer.length 54 | 55 | -- If we're at the end of the buffer, then start from 56 | -- the beginning. 57 | if startpos >= endpos then startpos = 0 end 58 | 59 | m = search(startpos, endpos) or search(0, endpos) 60 | else 61 | local startpos = 0 62 | local endpos = buffer.current_pos 63 | 64 | m = search_rev(startpos, endpos) or search_rev(0, buffer.length) 65 | end 66 | 67 | if m then 68 | local s, e = m._start, m._end 69 | buffer:set_sel(e, s) 70 | else 71 | ui.statusbar_text = "Not found" 72 | end 73 | 74 | return false 75 | end 76 | 77 | return M 78 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # Simple test framework 2 | 3 | TEST_FILES := $(wildcard test_*.lua) 4 | TESTS := $(patsubst %.lua,%,$(TEST_FILES)) 5 | $(warning TESTS=$(TESTS)) 6 | 7 | LUA := lua 8 | 9 | %: %.lua 10 | $(LUA) $< 11 | 12 | test: $(TESTS) -------------------------------------------------------------------------------- /test/test.lua: -------------------------------------------------------------------------------- 1 | -- Test utilities 2 | local M = {} 3 | 4 | -- Get access to the regex modules 5 | package.path = "../?.lua;" .. package.path 6 | 7 | local eq 8 | local tableEq 9 | 10 | function eq(a, b) 11 | -- Easy case: builtin equal works for most cases. 12 | if a == b then return true end 13 | 14 | if type(a) ~= 'table' or type(b) ~= 'table' then 15 | -- If not both tables, then not equal. 16 | return false 17 | end 18 | return tableEq(a, b) 19 | end 20 | 21 | -- Compare two tables, treating them as the same if they key pairs 22 | -- are equal. 23 | function tableEq(a, b) 24 | -- First, check that every key in a matches one in b. 25 | for k,v in pairs(a) do 26 | if not eq(v, b[k]) then return false end 27 | end 28 | 29 | -- Second, check that every key in b exists in a. 30 | -- We don't need to compare - if the key is in a then we've already 31 | -- checked. 32 | for k,_ in pairs(b) do 33 | if a[k] == nil then return false end 34 | end 35 | 36 | -- They must be equal 37 | return true 38 | end 39 | 40 | -- Pretty-print tables 41 | function M.tostring(a) 42 | if type(a) == "string" then 43 | return '"' .. a .. '"' 44 | elseif type(a) ~= 'table' then return tostring(a) end 45 | local maxn = 0 46 | local sbits = {'{'} 47 | for i,v in ipairs(a) do 48 | table.insert(sbits, M.tostring(v) .. ", ") 49 | maxn = i 50 | end 51 | for k,v in pairs(a) do 52 | -- Do the non-contiguous-integer keys 53 | if type(k) == 'number' and k == math.ceil(k) and k <= maxn and k >= 1 then 54 | -- Ignore an integer key we've already seen 55 | else 56 | table.insert(sbits, '['..M.tostring(k)..'] = '..M.tostring(v)..', ') 57 | end 58 | end 59 | table.insert(sbits, '}') 60 | return table.concat(sbits) 61 | end 62 | 63 | --- Assert that a and b are equal. Tables are equal if their keys 64 | -- and values are equal. Calls error() with level to report an error. 65 | local function assertEqLevel(a, b, level) 66 | if not eq(a,b) then 67 | error("Failed assertion: [["..M.tostring(a).."]] != [["..M.tostring(b).."]]\n", level) 68 | end 69 | end 70 | 71 | --- Assert that a and b are equal. Tables are equal if their keys 72 | -- and values are equal. Returns true or calls error(). 73 | function M.assertEq(a, b) 74 | return assertEqLevel(a, b, 2) 75 | end 76 | 77 | function M.log(x) 78 | print(M.tostring(x)) 79 | end 80 | 81 | return M -------------------------------------------------------------------------------- /test/test_bugs.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local log = test.log 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | pat = compile('|') 8 | assertEq(pat:match("foobar"), {_start=1,_end=0}) 9 | 10 | pat = compile('()|') 11 | assertEq(pat:match("foobar"), {_start=1,_end=0, groups={{1,0}},}) 12 | 13 | pat = compile("((a)|(ab))((c)|(bc))") 14 | assertEq(pat:match("ac"), {_start=1,_end=2, groups={ 15 | {1,1}, 16 | {1,1}, 17 | nil, 18 | {2,2}, 19 | {2,2}, 20 | nil}}) 21 | assertEq(pat:match("abc"), {_start=1,_end=3, groups={ 22 | {1,1}, 23 | {1,1}, 24 | nil, 25 | {2,3}, 26 | nil, 27 | {2,3}}}) 28 | assertEq(pat:match("abbc"), {_start=1,_end=4, groups={ 29 | {1,2}, 30 | nil, 31 | {1,2}, 32 | {3,4}, 33 | nil, 34 | {3,4}}}) -------------------------------------------------------------------------------- /test/test_numgroups.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local function log(x) test.log(tostring(x) .. "\n") end 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | pat = compile('foo') 8 | assertEq(pat.numgroups, 0) 9 | 10 | pat = compile('f(o)o') 11 | assertEq(pat.numgroups, 1) 12 | 13 | pat = compile('f((o))o') 14 | assertEq(pat.numgroups, 2) 15 | 16 | pat = compile('f(o)((a)o)') 17 | assertEq(pat.numgroups, 3) -------------------------------------------------------------------------------- /test/test_regex.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local function log(x) test.log(tostring(x) .. "\n") end 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | local pat = compile('fo+[ab]ar') 8 | 9 | assertEq(pat:match("foobar"), {_start=1,_end=6}) 10 | assertEq(pat:match("foooobar"), {_start=1,_end=8}) 11 | assertEq(pat:match("foblahfoobar"), {_start=7, _end=12}) 12 | assertEq(pat:match("foblahfooaar"), {_start=7, _end=12}) 13 | assertEq(pat:match("fbar"), nil) 14 | 15 | local pat = compile('(?:foo|bar)+') 16 | 17 | assertEq(pat:match("asdfoo"), { _start=4, _end=6 }) 18 | assertEq(pat:match("asdfobar"), { _start=6, _end=8 }) 19 | assertEq(pat:match("asdfoofoobarjkl;"), { _start=4, _end=12 }) 20 | assertEq(pat:match("asdfabulous", nil)) 21 | 22 | local pat = compile('^.foo') 23 | 24 | assertEq(pat:match('afoo'), { _start=1, _end=4 }) 25 | assertEq(pat:match('jfoo'), { _start=1, _end=4 }) 26 | assertEq(pat:match('jjfoo'), nil) 27 | assertEq(pat:match('foo'), nil) 28 | assertEq(pat:match('foo then foo'), nil) 29 | 30 | local pat = compile('a.?b') 31 | 32 | assertEq(pat:match('abooo'), { _start=1,_end=2 }) 33 | assertEq(pat:match('axbooo'), { _start=1,_end=3 }) 34 | assertEq(pat:match('axxbooo'), nil) 35 | 36 | local pat = compile('a.*b') 37 | 38 | assertEq(pat:match('axbcdef'), { _start=1,_end=3 }) 39 | assertEq(pat:match('axxbcdef'), { _start=1,_end=4 }) 40 | assertEq(pat:match('abcdef'), { _start=1,_end=2 }) 41 | assertEq(pat:match('abcdebf'), { _start=1,_end=6 }) 42 | assertEq(pat:match('ababab'), { _start=1, _end=6 }) 43 | 44 | local pat = compile('\\') 45 | 46 | assertEq(pat:match('foo'), { _start=1, _end=3 }) 47 | assertEq(pat:match('afoo'), nil) 48 | assertEq(pat:match('foob'), nil) 49 | assertEq(pat:match('a foo b'), { _start=3, _end=5 }) 50 | assertEq(pat:match('a afoo b'), nil) 51 | assertEq(pat:match('a foob b'), nil) 52 | 53 | local pat = compile('10') 54 | 55 | assertEq(pat:match('10'), { _start=1, _end=2 }) 56 | assertEq(pat:match('10\n'), { _start=1, _end=2 }) 57 | 58 | local pat = compile('ab[^a-z,]de') 59 | 60 | assertEq(pat:match('abcde'), nil) 61 | assertEq(pat:match('abade'), nil) 62 | assertEq(pat:match('abzde'), nil) 63 | assertEq(pat:match('abCde'), { _start=1, _end=5 }) 64 | assertEq(pat:match('ab.de'), { _start=1, _end=5 }) 65 | assertEq(pat:match('ab,de'), nil) 66 | 67 | -- Check that it's case sensitive. 68 | local pat = compile('abc') 69 | 70 | assertEq(pat:match('abcde'), { _start=1, _end=3}) 71 | assertEq(pat:match('abade'), nil) 72 | assertEq(pat:match('abCde'), nil) 73 | 74 | -- Check counts 75 | local pat = compile('ab{3}c') 76 | assertEq(pat:match('abbbc'), { _start=1, _end=5}) 77 | assertEq(pat:match('abbc'), nil) 78 | assertEq(pat:match('abbXc'), nil) 79 | assertEq(pat:match('abbbbc'), nil) 80 | 81 | local pat = compile('ab{3,}c') 82 | assertEq(pat:match('abbc'), nil) 83 | assertEq(pat:match('abbbc'), { _start=1, _end=5}) 84 | assertEq(pat:match('abbbbc'), { _start=1, _end=6}) 85 | assertEq(pat:match('abbbbbc'), { _start=1, _end=7}) 86 | assertEq(pat:match('abbbbbbc'), { _start=1, _end=8}) 87 | 88 | local pat = compile('ab{3,5}c') 89 | assertEq(pat:match('abbc'), nil) 90 | assertEq(pat:match('abbbc'), { _start=1, _end=5}) 91 | assertEq(pat:match('abbbbc'), { _start=1, _end=6}) 92 | assertEq(pat:match('abbbbbc'), { _start=1, _end=7}) 93 | assertEq(pat:match('abbbbbbc'), nil) 94 | -------------------------------------------------------------------------------- /test/test_regex2.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local function log(x) test.log(tostring(x) .. "\n") end 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | local pat 8 | -- Check \s, \S, \w, \W, \d, \D 9 | pat = compile('x\\s') 10 | 11 | assertEq(pat:match("xxx j"), {_start=3,_end=4}) 12 | assertEq(pat:match("xxx\tj"), {_start=3,_end=4}) 13 | 14 | pat = compile('\\w\\s') 15 | assertEq(pat:match("x j"), {_start=1,_end=2}) 16 | assertEq(pat:match("Z_ j"), {_start=2, _end=3}) 17 | assertEq(pat:match("Z; j"), nil) 18 | 19 | pat = compile('\\W\\w') 20 | assertEq(pat:match(";2 177_x j"), {_start=1,_end=2}) 21 | 22 | pat = compile('\\S\\s\\S') 23 | assertEq(pat:match(";2 177_x j"), {_start=2,_end=4}) 24 | 25 | pat = compile('\\d+') 26 | assertEq(pat:match("askdfj1317 jlj"), {_start=7, _end=10}) 27 | 28 | pat = compile('\\D+') 29 | assertEq(pat:match("1230 _abc;_ 8"), {_start=5, _end=12}) 30 | 31 | -- Check C-style escapes 32 | pat = compile('\\t+') 33 | assertEq(pat:match("abc\t\t foo"), {_start=4, _end=5}) 34 | 35 | pat = compile('\\n+') 36 | assertEq(pat:match("abc\n\n foo"), {_start=4, _end=5}) 37 | 38 | pat = compile('\\r+') 39 | assertEq(pat:match("abc\r\r foo"), {_start=4, _end=5}) 40 | 41 | pat = compile('\\f+') 42 | assertEq(pat:match("abc\f\f foo"), {_start=4, _end=5}) 43 | 44 | pat = compile('\\a+') 45 | assertEq(pat:match("abc\a\a foo"), {_start=4, _end=5}) 46 | 47 | pat = compile('\\e+') 48 | assertEq(pat:match("abc\027\027 foo"), {_start=4, _end=5}) 49 | 50 | -- Check them inside charsets 51 | pat = compile('[ \\t]+') 52 | assertEq(pat:match("abc\t\t tfoo"), {_start=4, _end=6}) 53 | -------------------------------------------------------------------------------- /test/test_regex_backref.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local function log(x) test.log(tostring(x) .. "\n") end 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | local pat 8 | 9 | pat = compile('([A-Z]+)[a-z]*\\1') 10 | 11 | assertEq(pat:match('ABCfooDEF'), nil) 12 | assertEq(pat:match('ABCfooABC'), {_start=1, _end=9, groups={{1,3}}}) 13 | assertEq(pat:match('ABCfooBCD'), {_start=2, _end=8, groups={{2,3}}}) 14 | -------------------------------------------------------------------------------- /test/test_regex_cap.lua: -------------------------------------------------------------------------------- 1 | local test = require'test' 2 | local assertEq = test.assertEq 3 | local function log(x) test.log(tostring(x) .. "\n") end 4 | local pegex = require('pegex') 5 | local compile = pegex.compile 6 | 7 | local pat 8 | 9 | pat = compile('a(.*)b') 10 | 11 | assertEq(pat:match("axyzb"), {_start=1,_end=5, groups={{2,4}},}) 12 | assertEq(pat:match("axyzbb"), {_start=1,_end=6, groups={{2,5}},}) 13 | 14 | pat = compile('a(foo|bar)*b') 15 | 16 | --log(test.tostring(pegex.parse('a(foo|bar)*b'), '')) 17 | 18 | assertEq(pat:match("ab"), {_start=1,_end=2,}) 19 | assertEq(pat:match("afoob"), {_start=1,_end=5, groups={{2,4}},}) 20 | assertEq(pat:match("afoobarb"), {_start=1,_end=8, groups={{5,7}},}) 21 | 22 | pat = compile('a([a-z]*)z X([0-9]*)Y') 23 | 24 | assertEq(pat:match('az XY'), {_start=1, _end=5, groups={{2,1}, {5,4}}}) 25 | assertEq(pat:match('aasdfz X123Y'), {_start=1, _end=12, groups={{2,5},{9,11}}}) 26 | 27 | -- Nested groups 28 | pat = compile('a((b)*c)') 29 | 30 | assertEq(pat:match('abc'), {_start=1, _end=3, groups={{2,3}, {2,2}}}) --------------------------------------------------------------------------------