├── .gitattributes ├── default.project.json ├── src ├── .robloxrc ├── init.lua ├── RegEx │ ├── __tests__ │ │ ├── escapeString.lua │ │ └── testoutput1.spec.lua │ └── init.lua ├── __tests__ │ ├── test.spec.lua │ ├── exec.spec.lua │ └── init.spec.lua └── Regexp.global.lua ├── Packages └── .robloxrc ├── .gitignore ├── test-model.project.json ├── wally.toml ├── foreman.toml ├── bin ├── ci.sh ├── spec.lua ├── parseTestFile.lua └── generate-pcre2-tests.lua ├── rotriever.toml ├── CHANGELOG.md ├── selene.toml ├── README.md ├── LICENSE └── testez.toml /.gitattributes: -------------------------------------------------------------------------------- 1 | *.lua linguist-language=Luau 2 | -------------------------------------------------------------------------------- /default.project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "RobloxLuauRegExp", 3 | "tree": { 4 | "$path": "src" 5 | } 6 | } -------------------------------------------------------------------------------- /src/.robloxrc: -------------------------------------------------------------------------------- 1 | { 2 | "language": { 3 | "mode": "nonstrict" 4 | }, 5 | "lint": { 6 | "*": "enabled" 7 | } 8 | } -------------------------------------------------------------------------------- /Packages/.robloxrc: -------------------------------------------------------------------------------- 1 | { 2 | "language": { 3 | "mode": "nocheck" 4 | }, 5 | "lint": { 6 | "*": "disabled" 7 | } 8 | } -------------------------------------------------------------------------------- /src/init.lua: -------------------------------------------------------------------------------- 1 | local RegExp = require(script["Regexp.global"]) 2 | 3 | export type RegExp = RegExp.RegExp 4 | 5 | return RegExp 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Packages/* 2 | !Packages/.robloxrc 3 | # let selene auto-generate latest 4 | roblox.toml 5 | rotriever.lock 6 | *.rbxmx 7 | -------------------------------------------------------------------------------- /test-model.project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "LuauRegExpTestModel", 3 | "tree": { 4 | "$className": "Folder", 5 | "Packages": { 6 | "$path": "Packages", 7 | "RegExp": { 8 | "$path": "src" 9 | } 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /wally.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "roblox/regexp" 3 | version = "0.2.2" 4 | license = "MIT" 5 | authors = ["Roblox "] 6 | 7 | realm = "shared" 8 | registry = "https://github.com/UpliftGames/wally-index" 9 | repository = "https://github.com/Roblox/luau-regexp" 10 | 11 | exclude = ["**/__tests__/**"] 12 | 13 | [dependencies] 14 | -------------------------------------------------------------------------------- /foreman.toml: -------------------------------------------------------------------------------- 1 | [tools] 2 | selene = { source = "Roblox/Kampfkarren-selene", version = "0.21.0" } 3 | stylua = { source = "Roblox/JohnnyMorganz-StyLua", version = "0.18.1" } 4 | rotrieve = { source = "roblox/rotriever", version = "=0.5.13-alpha.5" } 5 | rbx-aged-cli = { source = "Roblox/rbx-aged-tool", version = "5.8.1" } 6 | wally = { source = "UpliftGames/wally", version = "0.3.2" } 7 | -------------------------------------------------------------------------------- /bin/ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | echo "Build project" 6 | rojo build test-model.project.json --output model.rbxmx 7 | echo "Remove .robloxrc from dev dependencies" 8 | find Packages/Dev -name "*.robloxrc" | xargs rm -f 9 | find Packages/_Index -name "*.robloxrc" | xargs rm -f 10 | echo "Run static analysis" 11 | selene src/init.lua src/__tests__ 12 | stylua -c src/init.lua src/__tests__ 13 | roblox-cli analyze test-model.project.json 14 | echo "Run tests" 15 | roblox-cli run --load.model model.rbxmx --run bin/spec.lua 16 | -------------------------------------------------------------------------------- /bin/spec.lua: -------------------------------------------------------------------------------- 1 | local ProcessService = game:GetService("ProcessService") 2 | local Root = script.Parent.LuauRegExpTestModel 3 | 4 | local Packages = Root.Packages 5 | local TestEZ = require(Root.Packages.Dev.TestEZ) 6 | 7 | -- Run all tests, collect results, and report to stdout. 8 | local result = TestEZ.TestBootstrap:run( 9 | { Packages.RegExp }, 10 | TestEZ.Reporters.TextReporterQuiet 11 | ) 12 | 13 | if result.failureCount == 0 and #result.errors == 0 then 14 | ProcessService:ExitAsync(0) 15 | else 16 | ProcessService:ExitAsync(1) 17 | end 18 | -------------------------------------------------------------------------------- /rotriever.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "RegExp" 3 | version = "0.2.2" 4 | authors = ["#luau"] 5 | description = "A regular expression library for Luau." 6 | repository = "https://github.com/roblox/luau-regexp-internal" 7 | keywords = ["regex", "regexpr"] 8 | content_root = "src" 9 | files = ["*", "!**/__tests__/**"] 10 | 11 | [config] 12 | registry_index = true 13 | 14 | [dev_dependencies] 15 | JestGlobals = "github.com/roblox/jest-roblox@2.0.1" 16 | TestEZ = "github.com/roblox/jest-roblox@2.0.1" 17 | LuauPolyfill = "github.com/roblox/luau-polyfill@0.2.1" 18 | -------------------------------------------------------------------------------- /src/RegEx/__tests__/escapeString.lua: -------------------------------------------------------------------------------- 1 | local ESCAPES = { 2 | ["\n"] = "n", 3 | ["\r"] = "r", 4 | ["\t"] = "t", 5 | ["\f"] = "f", 6 | ["\a"] = "a", 7 | ["\v"] = "v", 8 | ["\\"] = "\\", 9 | } 10 | local ESCAPE_CHARS = {} 11 | for char in pairs(ESCAPES) do 12 | table.insert(ESCAPE_CHARS, char) 13 | end 14 | local ESCAPE_CLASS = ("[%s%%c]"):format(table.concat(ESCAPE_CHARS, "")) 15 | 16 | local function escapeString(str) 17 | return str:gsub(ESCAPE_CLASS, function(match) 18 | return "\\" .. (ESCAPES[match] or ("%03d"):format(match:byte())) 19 | end) 20 | end 21 | 22 | return escapeString 23 | -------------------------------------------------------------------------------- /src/__tests__/test.spec.lua: -------------------------------------------------------------------------------- 1 | return function() 2 | local RegExpModule = script.Parent.Parent 3 | local RegExp = require(RegExpModule) 4 | type RegExp = RegExp.RegExp 5 | 6 | local Packages = RegExpModule.Parent 7 | local JestGlobals = require(Packages.Dev.JestGlobals) 8 | local jestExpect = JestGlobals.expect 9 | 10 | it("returns true when the regex matches", function() 11 | local re: RegExp = RegExp("a") 12 | jestExpect(re:test("a")).toEqual(true) 13 | end) 14 | 15 | it("returns false when the regex does not match", function() 16 | local re: RegExp = RegExp("a") 17 | jestExpect(re:test("b")).toEqual(false) 18 | end) 19 | end 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Luau RegExp Changelog 2 | 3 | ## 0.2.0 4 | 5 | - Remove unicode support to minimize package size. The existing source files to support unicode were large enough to impact time to parse, time to require and total package size ([#4](https://github.com/Roblox/luau-regexp/pull/4)) 6 | 7 | ## 0.1.3 8 | 9 | - Bump version to uptake fix for test file filtering in cached artifact ([#3](https://github.com/Roblox/luau-regexp/pull/3)) 10 | 11 | ## 0.1.2 12 | 13 | - Export RegExp type ([#2](https://github.com/Roblox/luau-regexp/pull/2)) 14 | 15 | ## 0.1.1 16 | 17 | - Remove tests from packages ([#1](https://github.com/Roblox/luau-regexp/pull/1)) 18 | 19 | ## 0.1.0 20 | 21 | - Initial release 22 | -------------------------------------------------------------------------------- /selene.toml: -------------------------------------------------------------------------------- 1 | std = "roblox+testez" 2 | 3 | [config] 4 | empty_if = { comments_count = true } 5 | unused_variable = { ignore_pattern = "result|ok|^_" } 6 | # this comes up when translating nested try/finally scenarios 7 | shadowing = { ignore_pattern = "result|ok|^_" } 8 | # feature request for this config: https://github.com/Kampfkarren/selene/issues/181 9 | # global_usage = { ignore_pattern = "^__" } 10 | 11 | [rules] 12 | # remove this once the feature request here is implemented: https://github.com/Kampfkarren/selene/issues/181 13 | global_usage = "allow" 14 | unused_variable = "allow" 15 | # remove when the Luau type narrowing issues (and the workarounds) are resolved 16 | shadowing = "allow" 17 | 18 | # remove when this issue is fixed: https://github.com/Kampfkarren/selene/issues/179 19 | if_same_then_else = "allow" 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RegExp for Luau 2 | A regular expression library for Luau. Adapted from a regex implementation by github user Blockzez (https://github.com/Blockzez). 3 | 4 | ## API 5 | Create a regex with the `new` function: 6 | `RegExp.new(pattern: string, flags: string) -> RegularExpression` 7 | 8 | A resulting RegularExpression has the following methods: 9 | * `RegularExpression:exec(str: string) -> Match` 10 | * `RegularExpression:test(str: string) -> boolean` 11 | 12 | The `Match` object resulting from `exec` has the following fields: 13 | * `[1..n]` - The array potion of the `Match` object contains captured groups 14 | * `n` - The length of the array of resulting captured groups 15 | * `index` - The index in the original string where the match begins 16 | * `input` - The original string passed into `exec` 17 | 18 | ### Flags 19 | The following flags can be provided via the second argument to `RegExp.new`: 20 | * "i" - ignoreCase 21 | * "g" - global 22 | * "m" - multiline -------------------------------------------------------------------------------- /src/__tests__/exec.spec.lua: -------------------------------------------------------------------------------- 1 | return function() 2 | local RegExpModule = script.Parent.Parent 3 | local RegExp = require(RegExpModule) 4 | type RegExp = RegExp.RegExp 5 | 6 | local Packages = RegExpModule.Parent 7 | local JestGlobals = require(Packages.Dev.JestGlobals) 8 | local jestExpect = JestGlobals.expect 9 | 10 | -- deviation: since we can't have `nil` values in list-like 11 | -- tables, we have to return the total number of matches, so 12 | -- that we can know when to stop iteration 13 | it("returns the number of matches", function() 14 | local re: RegExp = RegExp("abc") 15 | local result = re:exec("abc") 16 | jestExpect(result.n).toEqual(1) 17 | end) 18 | 19 | it("returns the matches starting from index 1", function() 20 | local re: RegExp = RegExp("abc") 21 | local result = re:exec("abc") 22 | jestExpect(result[1]).toEqual("abc") 23 | end) 24 | 25 | it("returns the starting position of the match", function() 26 | local re: RegExp = RegExp("abc") 27 | local result = re:exec("aabc") 28 | jestExpect(result.index).toEqual(2) 29 | end) 30 | end 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Roblox 4 | 5 | Copyright (c) 2020, 2023 - Blockzez (devforum.roblox.com/u/Blockzez and github.com/Blockzez) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /testez.toml: -------------------------------------------------------------------------------- 1 | [[afterAll.args]] 2 | type = "function" 3 | 4 | [[afterEach.args]] 5 | type = "function" 6 | 7 | [[beforeAll.args]] 8 | type = "function" 9 | 10 | [[beforeEach.args]] 11 | type = "function" 12 | 13 | [[describe.args]] 14 | type = "string" 15 | 16 | [[describe.args]] 17 | type = "function" 18 | 19 | [[describeFOCUS.args]] 20 | type = "string" 21 | 22 | [[describeFOCUS.args]] 23 | type = "function" 24 | 25 | [[describeSKIP.args]] 26 | type = "string" 27 | 28 | [[describeSKIP.args]] 29 | type = "function" 30 | 31 | [[expect.args]] 32 | type = "any" 33 | 34 | [[FIXME.args]] 35 | type = "string" 36 | required = false 37 | 38 | [FOCUS] 39 | args = [] 40 | 41 | [[it.args]] 42 | type = "string" 43 | 44 | [[it.args]] 45 | type = "function" 46 | 47 | [[itFIXME.args]] 48 | type = "string" 49 | 50 | [[itFIXME.args]] 51 | type = "function" 52 | 53 | [[itFOCUS.args]] 54 | type = "string" 55 | 56 | [[itFOCUS.args]] 57 | type = "function" 58 | 59 | [[fit.args]] 60 | type = "string" 61 | 62 | [[fit.args]] 63 | type = "function" 64 | 65 | [[itSKIP.args]] 66 | type = "string" 67 | 68 | [[itSKIP.args]] 69 | type = "function" 70 | 71 | [[xit.args]] 72 | type = "string" 73 | 74 | [[xit.args]] 75 | type = "function" 76 | 77 | [SKIP] 78 | args = [] 79 | 80 | -------------------------------------------------------------------------------- /src/Regexp.global.lua: -------------------------------------------------------------------------------- 1 | local RegEx = require(script.Parent.RegEx) 2 | type Array = { [number]: T } 3 | 4 | type RegExpExecArray = Array & { index: number?, input: string?, n: number } 5 | 6 | export type RegExp = { 7 | exec: (self: RegExp, input: string) -> RegExpExecArray | nil, 8 | test: (self: RegExp, input: string) -> boolean, 9 | } 10 | 11 | local RegExp = {} 12 | local RegExpMetatable = { 13 | __index = RegExp, 14 | __tostring = function(self) 15 | return tostring(self._innerRegEx) 16 | end, 17 | } 18 | 19 | function RegExp:exec(str: string): RegExpExecArray | nil 20 | local match = self._innerRegEx:match(str) 21 | if not match then 22 | return nil 23 | end 24 | 25 | local index = match:span() 26 | local groups = match:grouparr() 27 | 28 | local matches = { groups[0] } 29 | for i = 1, groups.n do 30 | matches[i + 1] = groups[i] 31 | end 32 | matches.n = groups.n + 1 33 | matches.index = index 34 | matches.input = str 35 | return matches 36 | end 37 | 38 | function RegExp:test(str: string): boolean 39 | return self:exec(str) ~= nil 40 | end 41 | 42 | local function new(_self, pattern: RegExp | string, flags: string?) 43 | flags = flags or "" 44 | local innerRegEx = RegEx.new(pattern, flags) 45 | local object = { 46 | source = pattern, 47 | ignoreCase = (flags :: string):find("i") ~= nil, 48 | global = (flags :: string):find("g") ~= nil, 49 | multiline = (flags :: string):find("m") ~= nil, 50 | _innerRegEx = innerRegEx, 51 | } 52 | 53 | return setmetatable(object, RegExpMetatable) 54 | end 55 | 56 | -- FIXME: Capture this as a local variable before returning, else a luau bug 57 | -- prevents __call from being understood: https://jira.rbx.com/browse/CLI-40294 58 | local interface = setmetatable(RegExp, { 59 | __call = new, 60 | }) 61 | 62 | return interface 63 | -------------------------------------------------------------------------------- /src/RegEx/__tests__/testoutput1.spec.lua: -------------------------------------------------------------------------------- 1 | return function() 2 | local __tests__ = script.Parent 3 | local testData = require(__tests__["testoutput1.gen"]) 4 | local escapeString = require(__tests__.escapeString) 5 | local RegEx = require(__tests__.Parent) 6 | 7 | local function shortenIfTooLong(str) 8 | if str:len() > 80 then 9 | str = str:sub(1, 76) .. " ..." 10 | end 11 | return escapeString(str) 12 | end 13 | 14 | for _, case in pairs(testData) do 15 | local message = ("regex `%s`%s"):format( 16 | shortenIfTooLong(case.source), 17 | case.flags == nil and "" or ("with %s flags"):format(case.flags) 18 | ) 19 | describe(message, function() 20 | local regex = nil 21 | beforeEach(function() 22 | regex = RegEx.new(case.source, case.flags) 23 | end) 24 | 25 | for _, testCase in ipairs(case.tests) do 26 | if testCase.matches == nil then 27 | -- using the length in the test name will dedup tests 28 | -- where the match is identical except one ends with `\0` 29 | local testMessage = ("does not match with `%s` (len: %d)"):format( 30 | testCase.input, 31 | testCase.input:len() 32 | ) 33 | it(testMessage, function() 34 | expect(regex:match(testCase.input)).to.equal(nil) 35 | end) 36 | else 37 | describe(("matches with `%s`"):format(testCase.input), function() 38 | local matchResults = nil 39 | beforeEach(function() 40 | matchResults = regex:match(testCase.input) 41 | end) 42 | 43 | for _, match in ipairs(testCase.matches) do 44 | local testMessage = ("match #%d is `%s`"):format( 45 | match.index, 46 | match.match 47 | ) 48 | it(testMessage, function() 49 | expect(matchResults).to.be.ok() 50 | local expectedMatch = match.match 51 | if expectedMatch == "" then 52 | expectedMatch = nil 53 | end 54 | expect(matchResults:group(match.index)).to.equal(expectedMatch) 55 | end) 56 | end 57 | end) 58 | end 59 | end 60 | end) 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /src/__tests__/init.spec.lua: -------------------------------------------------------------------------------- 1 | return function() 2 | local RegExpModule = script.Parent.Parent 3 | local RegExp = require(RegExpModule) 4 | 5 | local Packages = RegExpModule.Parent 6 | local LuauPolyfill = require(Packages.Dev.LuauPolyfill) 7 | local instanceof = LuauPolyfill.instanceof 8 | local JestGlobals = require(Packages.Dev.JestGlobals) 9 | local jestExpect = JestGlobals.expect 10 | 11 | describe("ignoreCase", function() 12 | it("has a `ignoreCase` property set to true if the `i` flag is used", function() 13 | jestExpect(RegExp("foo", "i").ignoreCase).toEqual(true) 14 | end) 15 | 16 | it("has a `ignoreCase` property set to false by default", function() 17 | jestExpect(RegExp("foo").ignoreCase).toEqual(false) 18 | end) 19 | end) 20 | 21 | describe("multiline", function() 22 | it("has a `multiline` property set to true if the `m` flag is used", function() 23 | jestExpect(RegExp("foo", "m").multiline).toEqual(true) 24 | end) 25 | 26 | it("has a `multiline` property set to false by default", function() 27 | jestExpect(RegExp("foo").multiline).toEqual(false) 28 | end) 29 | end) 30 | 31 | describe("global", function() 32 | -- deviation: `g` flag not implemented yet 33 | itSKIP("has a `global` property set to true if the `g` flag is used", function() 34 | jestExpect(RegExp("foo", "g").global).toEqual(true) 35 | end) 36 | 37 | -- deviation: `g` flag not implemented yet 38 | itSKIP("has a `global` property set to false by default", function() 39 | jestExpect(RegExp("foo").global).toEqual(false) 40 | end) 41 | end) 42 | 43 | describe("toString", function() 44 | it("has a correct tostring output", function() 45 | jestExpect(tostring(RegExp("pattern"))).toEqual("/pattern/") 46 | end) 47 | 48 | it("has a correct ordering of flags in tostring output", function() 49 | jestExpect(tostring(RegExp("regexp\\d", "mi"))).toEqual("/regexp\\d/im") 50 | end) 51 | end) 52 | 53 | describe("inheritance", function() 54 | it("follows our expectations for inheritance", function() 55 | jestExpect(instanceof(RegExp("test"), RegExp)).toEqual(true) 56 | end) 57 | end) 58 | end 59 | -------------------------------------------------------------------------------- /bin/parseTestFile.lua: -------------------------------------------------------------------------------- 1 | -- this limits the total number of regex tests that will be parsed 2 | -- from the pcre2 test file. 3 | local MAX = 100000 4 | 5 | local function parseTestFile(file) 6 | local testCaseList = {} 7 | 8 | local start = file:find("^/") 9 | 10 | local count = 0 11 | while start ~= nil do 12 | count = count + 1 13 | 14 | local ending = file:find("/", start + 1) 15 | local regexSource = file:sub(start + 1, ending - 1) 16 | local endOfRegexSourceLine = file:find("\n", ending + 1) 17 | local flags = file:sub(ending + 1, endOfRegexSourceLine - 1) 18 | 19 | local nextLineStart = endOfRegexSourceLine + 1 20 | local nextLineEnd = file:find("\n", endOfRegexSourceLine + 1) 21 | local line = file:sub(nextLineStart, nextLineEnd - 1) 22 | 23 | local regexTests = {} 24 | local currentTest = nil 25 | 26 | while line ~= "" do 27 | local matchIndex, matchValue = line:match("^([%d ]%d+): ?(.*)$") 28 | if line == "No match" then 29 | currentTest.matches = nil 30 | elseif matchIndex ~= nil then 31 | assert( 32 | currentTest.matches, 33 | "error parsing regex " .. tostring(count) .. " tests: '" .. regexSource .. "'" 34 | ) 35 | table.insert(currentTest.matches, { 36 | index = tonumber(matchIndex), 37 | match = matchValue, 38 | }) 39 | else 40 | if currentTest ~= nil then 41 | table.insert(regexTests, currentTest) 42 | end 43 | currentTest = { 44 | input = line:match("^ *(.+)$"), 45 | matches = {}, 46 | } 47 | end 48 | nextLineStart = nextLineEnd + 1 49 | nextLineEnd = file:find("\n", nextLineStart) 50 | line = file:sub(nextLineStart, nextLineEnd - 1) 51 | end 52 | 53 | if currentTest ~= nil then 54 | table.insert(regexTests, currentTest) 55 | end 56 | 57 | local regexInfo = { 58 | source = regexSource, 59 | flags = flags, 60 | } 61 | 62 | table.insert(testCaseList, { 63 | source = regexSource, 64 | flags = flags, 65 | tests = regexTests, 66 | }) 67 | 68 | if regexInfo.source == nil then 69 | error("\n\nerror at count = " .. count .. "\n\n") 70 | end 71 | 72 | start = file:find("\n/", nextLineEnd) 73 | if start then 74 | start = start + 1 75 | end 76 | 77 | if count >= MAX then 78 | break 79 | end 80 | end 81 | 82 | return testCaseList 83 | end 84 | 85 | return parseTestFile 86 | -------------------------------------------------------------------------------- /bin/generate-pcre2-tests.lua: -------------------------------------------------------------------------------- 1 | local parseTestFile = require("bin.parseTestFile") 2 | 3 | --[[ 4 | The next require refers to a prce2 test data file wrapped as a lua module 5 | that returns a string. Tthese test data files can be found on github: 6 | https://github.com/luvit/pcre2/tree/master/testdata 7 | 8 | Each file that starts with `testoutput` should work, simply take one and 9 | create a lua file that looks like this: 10 | ``` 11 | return [==========================[ 12 | *** content of the test data file, after the comments (lines starting with #) *** 13 | ]==========================] 14 | ``` 15 | 16 | Require the test data file from the project root path. If you put it in the 17 | root, it is simply the filename without the `.lua` extension. 18 | ]] 19 | local testFile = require("src.RegEx.__tests__.testoutput1") 20 | 21 | local testCases = parseTestFile(testFile) 22 | 23 | local luaOutputLines = {} 24 | 25 | local function writeLine(strFormat, ...) 26 | table.insert(luaOutputLines, "\t" .. strFormat:format(...)) 27 | end 28 | 29 | local function findNotEscaped(str, char, startIndex) 30 | local foundIndex = str:find(char, startIndex, true) 31 | while foundIndex ~= nil do 32 | local escaped = false 33 | local currentChar = foundIndex - 1 34 | while str:sub(currentChar, currentChar) == "\\" do 35 | escaped = not escaped 36 | currentChar = currentChar - 1 37 | if currentChar == 0 then 38 | break 39 | end 40 | end 41 | if not escaped then 42 | return foundIndex 43 | end 44 | foundIndex = str:find(char, foundIndex + 1, true) 45 | end 46 | return nil 47 | end 48 | 49 | local function removeUnescapeBackslash(str) 50 | local index = 1 51 | index = findNotEscaped(str, "\\", index) 52 | while index ~= nil do 53 | if str:sub(index + 1, index + 1):match("[afnrt%d\\]") then 54 | index = findNotEscaped(str, "\\", index + 1) 55 | else 56 | str = str:sub(1, index) .. str:sub(index) 57 | index = findNotEscaped(str, "\\", index + 2) 58 | end 59 | end 60 | return str 61 | end 62 | 63 | local function quote(str) 64 | return ('"%s"'):format( 65 | str:gsub('"', '\\"'):gsub("\n", "\\n") 66 | ) 67 | end 68 | 69 | local function processInput(str) 70 | str = str 71 | :gsub("\\e", "\\027") 72 | :gsub("\\%$", "$") 73 | :gsub("\\x%x%x?", function(match) 74 | local asciiIndex = tonumber(match:sub(3), 16) 75 | return ("\\%03d"):format(asciiIndex) 76 | end) 77 | 78 | return quote(removeUnescapeBackslash(str)) 79 | end 80 | 81 | local function processMatch(str) 82 | str = str:gsub("\\e", "\\027") 83 | :gsub("\\[^tnrfax\\%d]", function(match) 84 | return "\\" .. match 85 | end) 86 | :gsub("\\x%x%x", function(match) 87 | local asciiIndex = tonumber(match:sub(3), 16) 88 | return ("\\%03d"):format(asciiIndex) 89 | end) 90 | return quote(removeUnescapeBackslash(str)) 91 | end 92 | 93 | for i, case in ipairs(testCases) do 94 | local totalTestCases = #case.tests 95 | if totalTestCases > 0 then 96 | writeLine("{") 97 | 98 | writeLine("\tsource = [==[%s]==],", case.source) 99 | if case.flags and case.flags ~= "" then 100 | writeLine("\tflags = %q,", case.flags) 101 | end 102 | 103 | writeLine("\ttests = {") 104 | for _, test in ipairs(case.tests) do 105 | if test.matches then 106 | writeLine("\t\t{") 107 | writeLine("\t\t\tinput = %s,", processInput(test.input)) 108 | if #test.matches == 1 then 109 | writeLine("\t\t\tmatches = {{ index = %d, match = %s }},", 110 | test.matches[1].index, 111 | processMatch(test.matches[1].match) 112 | ) 113 | else 114 | writeLine("\t\t\tmatches = {") 115 | for _, match in ipairs(test.matches) do 116 | writeLine( 117 | "\t\t\t\t{ index = %d, match = %s },", 118 | match.index, 119 | processMatch(match.match) 120 | ) 121 | end 122 | writeLine("\t\t\t}") 123 | end 124 | writeLine("\t\t},") 125 | else 126 | writeLine("\t\t{ input = %s },", processInput(test.input)) 127 | end 128 | end 129 | writeLine("\t},") 130 | 131 | writeLine("},") 132 | else 133 | print(("no test case found for #%d: %s"):format(i, case.source)) 134 | end 135 | end 136 | 137 | local file = io.open("testoutput1.gen.lua", "w+") 138 | file:write("return {\n") 139 | 140 | file:write(table.concat(luaOutputLines, "\n")) 141 | 142 | file:write("\n}\n") 143 | 144 | file:close() 145 | -------------------------------------------------------------------------------- /src/RegEx/init.lua: -------------------------------------------------------------------------------- 1 | --!nolint 2 | --!nocheck 3 | --[[ 4 | PCRE2-based RegEx implemention for Luau 5 | Version 1.0.0a2 (2020) 6 | Expat Licence 7 | Copyright © 2020, 2023 - Blockzez (devforum.roblox.com/u/Blockzez and github.com/Blockzez) 8 | All rights reserved. 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | ]] 28 | --[[ Settings ]]-- 29 | -- You can change them here 30 | local options = { 31 | -- The maximum cache size for regex so the patterns are cached so it doesn't recompile the pattern 32 | -- The only accepted value are number values >= 0, strings that can be automatically coered to numbers that are >= 0, false and nil 33 | -- Do note that empty regex patterns (comment-only patterns included) are never cached regardless 34 | -- The default is 256 35 | cacheSize = 256, 36 | 37 | -- A boolean that determines whether this use unicode data 38 | -- If this value evalulates to false, you can remove _unicodechar_category, _scripts and _xuc safely and it'll now error if: 39 | -- - You try to compile a RegEx with unicode flag 40 | -- - You try to use the \p pattern 41 | -- The default is true 42 | unicodeData = false, 43 | }; 44 | 45 | -- 46 | local u_categories = options.unicodeData and require(script:WaitForChild("_unicodechar_category")); 47 | local chr_scripts = options.unicodeData and require(script:WaitForChild("_scripts")); 48 | local xuc_chr = options.unicodeData and require(script:WaitForChild("_xuc")); 49 | local proxy = setmetatable({ }, { __mode = 'k' }); 50 | local re, re_m, match_m = { }, { }, { }; 51 | local lockmsg; 52 | 53 | --[[ Functions ]]-- 54 | local function to_str_arr(self, init) 55 | if init then 56 | self = string.sub(self, utf8.offset(self, init)); 57 | end; 58 | local len = utf8.len(self); 59 | if len <= 1999 then 60 | return { n = len, s = self, utf8.codepoint(self, 1, #self) }; 61 | end; 62 | local clen = math.ceil(len / 1999); 63 | local ret = table.create(len); 64 | local p = 1; 65 | for i = 1, clen do 66 | local c = table.pack(utf8.codepoint(self, utf8.offset(self, i * 1999 - 1998), utf8.offset(self, i * 1999 - (i == clen and 1998 - ((len - 1) % 1999 + 1) or - 1)) - 1)); 67 | table.move(c, 1, c.n, p, ret); 68 | p += c.n; 69 | end; 70 | ret.s, ret.n = self, len; 71 | return ret; 72 | end; 73 | 74 | local function from_str_arr(self) 75 | local len = self.n or #self; 76 | if len <= 7997 then 77 | return utf8.char(table.unpack(self)); 78 | end; 79 | local clen = math.ceil(len / 7997); 80 | local r = table.create(clen); 81 | for i = 1, clen do 82 | r[i] = utf8.char(table.unpack(self, i * 7997 - 7996, i * 7997 - (i == clen and 7997 - ((len - 1) % 7997 + 1) or 0))); 83 | end; 84 | return table.concat(r); 85 | end; 86 | 87 | local function utf8_sub(self, i, j) 88 | j = utf8.offset(self, j); 89 | return string.sub(self, utf8.offset(self, i), j and j - 1); 90 | end; 91 | 92 | -- 93 | local flag_map = { 94 | a = 'anchored', i = 'caseless', m = 'multiline', s = 'dotall', u = 'unicode', U = 'ungreedy', x ='extended', 95 | }; 96 | 97 | local posix_class_names = { 98 | alnum = true, alpha = true, ascii = true, blank = true, cntrl = true, digit = true, graph = true, lower = true, print = true, punct = true, space = true, upper = true, word = true, xdigit = true, 99 | }; 100 | 101 | local escape_chars = { 102 | -- grouped 103 | -- digit, spaces and words 104 | [0x44] = { "class", "digit", true }, [0x53] = { "class", "space", true }, [0x57] = { "class", "word", true }, 105 | [0x64] = { "class", "digit", false }, [0x73] = { "class", "space", false }, [0x77] = { "class", "word", false }, 106 | -- horizontal/vertical whitespace and newline 107 | [0x48] = { "class", "blank", true }, [0x56] = { "class", "vertical_tab", true }, 108 | [0x68] = { "class", "blank", false }, [0x76] = { "class", "vertical_tab", false }, 109 | [0x4E] = { 0x4E }, [0x52] = { 0x52 }, 110 | 111 | -- not grouped 112 | [0x42] = 0x08, 113 | [0x6E] = 0x0A, [0x72] = 0x0D, [0x74] = 0x09, 114 | }; 115 | 116 | local b_escape_chars = { 117 | -- word boundary and not word boundary 118 | [0x62] = { 0x62, { "class", "word", false } }, [0x42] = { 0x42, { "class", "word", false } }, 119 | 120 | -- keep match out 121 | [0x4B] = { 0x4B }, 122 | 123 | -- start & end of string 124 | [0x47] = { 0x47 }, [0x4A] = { 0x4A }, [0x5A] = { 0x5A }, [0x7A] = { 0x7A }, 125 | }; 126 | 127 | local valid_categories = { 128 | C = true, Cc = true, Cf = true, Cn = true, Co = true, Cs = true, 129 | L = true, Ll = true, Lm = true, Lo = true, Lt = true, Lu = true, 130 | M = true, Mc = true, Me = true, Mn = true, 131 | N = true, Nd = true, Nl = true, No = true, 132 | P = true, Pc = true, Pd = true, Pe = true, Pf = true, Pi = true, Po = true, Ps = true, 133 | S = true, Sc = true, Sk = true, Sm = true, So = true, 134 | Z = true, Zl = true, Zp = true, Zs = true, 135 | 136 | Xan = true, Xps = true, Xsp = true, Xuc = true, Xwd = true, 137 | }; 138 | 139 | local class_ascii_punct = { 140 | [0x21] = true, [0x22] = true, [0x23] = true, [0x24] = true, [0x25] = true, [0x26] = true, [0x27] = true, [0x28] = true, [0x29] = true, [0x2A] = true, [0x2B] = true, [0x2C] = true, [0x2D] = true, [0x2E] = true, [0x2F] = true, 141 | [0x3A] = true, [0x3B] = true, [0x3C] = true, [0x3D] = true, [0x3E] = true, [0x3F] = true, [0x40] = true, [0x5B] = true, [0x5C] = true, [0x5D] = true, [0x5E] = true, [0x5F] = true, [0x60] = true, [0x7B] = true, [0x7C] = true, 142 | [0x7D] = true, [0x7E] = true, 143 | }; 144 | 145 | local end_str = { 0x24 }; 146 | local dot = { 0x2E }; 147 | local beginning_str = { 0x5E }; 148 | local alternation = { 0x7C }; 149 | 150 | local function check_re(re_type, name, func) 151 | if re_type == "Match" then 152 | return function(...) 153 | local arg_n = select('#', ...); 154 | if arg_n < 1 then 155 | error("missing argument #1 (Match expected)", 2); 156 | end; 157 | local arg0, arg1 = ...; 158 | if not (proxy[arg0] and proxy[arg0].name == "Match") then 159 | error(string.format("invalid argument #1 to %q (Match expected, got %s)", name, typeof(arg0)), 2); 160 | else 161 | arg0 = proxy[arg0]; 162 | end; 163 | if name == "group" or name == "span" then 164 | if arg1 == nil then 165 | arg1 = 0; 166 | end; 167 | end; 168 | return func(arg0, arg1); 169 | end; 170 | end; 171 | return function(...) 172 | local arg_n = select('#', ...); 173 | if arg_n < 1 then 174 | error("missing argument #1 (RegEx expected)", 2); 175 | elseif arg_n < 2 then 176 | error("missing argument #2 (string expected)", 2); 177 | end; 178 | local arg0, arg1, arg2, arg3, arg4, arg5 = ...; 179 | if not (proxy[arg0] and proxy[arg0].name == "RegEx") then 180 | if type(arg0) ~= "string" and type(arg0) ~= "number" then 181 | error(string.format("invalid argument #1 to %q (RegEx expected, got %s)", name, typeof(arg0)), 2); 182 | end; 183 | arg0 = re.fromstring(arg0); 184 | elseif name == "sub" then 185 | if type(arg2) == "number" then 186 | arg2 ..= ''; 187 | elseif type(arg2) ~= "string" then 188 | error(string.format("invalid argument #3 to 'sub' (string expected, got %s)", typeof(arg2)), 2); 189 | end; 190 | elseif type(arg1) == "number" then 191 | arg1 ..= ''; 192 | elseif type(arg1) ~= "string" then 193 | error(string.format("invalid argument #2 to %q (string expected, got %s)", name, typeof(arg1)), 2); 194 | end; 195 | if name ~= "sub" and name ~= "split" then 196 | local init_type = typeof(arg2); 197 | if init_type ~= 'nil' then 198 | arg2 = tonumber(arg2); 199 | if not arg2 then 200 | error(string.format("invalid argument #3 to %q (number expected, got %s)", name, init_type), 2); 201 | elseif arg2 < 0 then 202 | arg2 = #arg1 + math.floor(arg2 + 0.5) + 1; 203 | else 204 | arg2 = math.max(math.floor(arg2 + 0.5), 1); 205 | end; 206 | end; 207 | end; 208 | arg0 = proxy[arg0]; 209 | if name == "match" or name == "matchiter" then 210 | arg3 = ...; 211 | elseif name == "sub" then 212 | arg5 = ...; 213 | end; 214 | return func(arg0, arg1, arg2, arg3, arg4, arg5); 215 | end; 216 | end; 217 | 218 | --[[ Matches ]]-- 219 | local function match_tostr(self) 220 | local spans = proxy[self].spans; 221 | local s_start, s_end = spans[0][1], spans[0][2]; 222 | if s_end <= s_start then 223 | return string.format("Match (%d..%d, empty)", s_start, s_end - 1); 224 | end; 225 | return string.format("Match (%d..%d): %s", s_start, s_end - 1, utf8_sub(spans.input, s_start, s_end)); 226 | end; 227 | 228 | local function new_match(span_arr, group_id, re, str) 229 | span_arr.source, span_arr.input = re, str; 230 | local object = newproxy(true); 231 | local object_mt = getmetatable(object); 232 | object_mt.__metatable = lockmsg; 233 | object_mt.__index = setmetatable(span_arr, match_m); 234 | object_mt.__tostring = match_tostr; 235 | 236 | proxy[object] = { name = "Match", spans = span_arr, group_id = group_id }; 237 | return object; 238 | end; 239 | 240 | match_m.group = check_re('Match', 'group', function(self, group_id) 241 | local span = self.spans[type(group_id) == "number" and group_id or self.group_id[group_id]]; 242 | if not span then 243 | return nil; 244 | end; 245 | return utf8_sub(self.spans.input, span[1], span[2]); 246 | end); 247 | 248 | match_m.span = check_re('Match', 'span', function(self, group_id) 249 | local span = self.spans[type(group_id) == "number" and group_id or self.group_id[group_id]]; 250 | if not span then 251 | return nil; 252 | end; 253 | return span[1], span[2] - 1; 254 | end); 255 | 256 | match_m.groups = check_re('Match', 'groups', function(self) 257 | local spans = self.spans; 258 | if spans.n > 0 then 259 | local ret = table.create(spans.n); 260 | for i = 0, spans.n do 261 | local v = spans[i]; 262 | if v then 263 | ret[i] = utf8_sub(spans.input, v[1], v[2]); 264 | end; 265 | end; 266 | return table.unpack(ret, 1, spans.n); 267 | end; 268 | return utf8_sub(spans.input, spans[0][1], spans[0][2]); 269 | end); 270 | 271 | match_m.groupdict = check_re('Match', 'groupdict', function(self) 272 | local spans = self.spans; 273 | local ret = { }; 274 | for k, v in pairs(self.group_id) do 275 | v = spans[v]; 276 | if v then 277 | ret[k] = utf8_sub(spans.input, v[1], v[2]); 278 | end; 279 | end; 280 | return ret; 281 | end); 282 | 283 | match_m.grouparr = check_re('Match', 'groupdict', function(self) 284 | local spans = self.spans; 285 | local ret = table.create(spans.n); 286 | for i = 0, spans.n do 287 | local v = spans[i]; 288 | if v then 289 | ret[i] = utf8_sub(spans.input, v[1], v[2]); 290 | end; 291 | end; 292 | ret.n = spans.n; 293 | return ret; 294 | end); 295 | 296 | -- 297 | local line_verbs = { 298 | CR = 0, LF = 1, CRLF = 2, ANYRLF = 3, ANY = 4, NUL = 5, 299 | }; 300 | local function is_newline(str_arr, i, verb_flags) 301 | local line_verb_n = verb_flags.newline; 302 | local chr = str_arr[i]; 303 | if line_verb_n == 0 then 304 | -- carriage return 305 | return chr == 0x0D; 306 | elseif line_verb_n == 2 then 307 | -- carriage return followed by line feed 308 | return chr == 0x0A and str_arr[i - 1] == 0x20; 309 | elseif line_verb_n == 3 then 310 | -- any of the above 311 | return chr == 0x0A or chr == 0x0D; 312 | elseif line_verb_n == 4 then 313 | -- any of Unicode newlines 314 | return chr == 0x0A or chr == 0x0B or chr == 0x0C or chr == 0x0D or chr == 0x85 or chr == 0x2028 or chr == 0x2029; 315 | elseif line_verb_n == 5 then 316 | -- null 317 | return chr == 0; 318 | end; 319 | -- linefeed 320 | return chr == 0x0A; 321 | end; 322 | 323 | 324 | local function tkn_char_match(tkn_part, str_arr, i, flags, verb_flags) 325 | local chr = str_arr[i]; 326 | if not chr then 327 | return false; 328 | elseif flags.ignoreCase and chr >= 0x61 and chr <= 0x7A then 329 | chr -= 0x20; 330 | end; 331 | if type(tkn_part) == "number" then 332 | return tkn_part == chr; 333 | elseif tkn_part[1] == "charset" then 334 | for _, v in ipairs(tkn_part[3]) do 335 | if tkn_char_match(v, str_arr, i, flags, verb_flags) then 336 | return not tkn_part[2]; 337 | end; 338 | end; 339 | return tkn_part[2]; 340 | elseif tkn_part[1] == "range" then 341 | return chr >= tkn_part[2] and chr <= tkn_part[3] or flags.ignoreCase and chr >= 0x41 and chr <= 0x5A and (chr + 0x20) >= tkn_part[2] and (chr + 0x20) <= tkn_part[3]; 342 | elseif tkn_part[1] == "class" then 343 | local char_class = tkn_part[2]; 344 | local negate = tkn_part[3]; 345 | local match = false; 346 | -- if and elseifs :( 347 | -- Might make these into tables in the future 348 | if char_class == "xdigit" then 349 | match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x46 or chr >= 0x61 and chr <= 0x66; 350 | elseif char_class == "ascii" then 351 | match = chr <= 0x7F; 352 | -- cannot be accessed through POSIX classes 353 | elseif char_class == "vertical_tab" then 354 | match = chr >= 0x0A and chr <= 0x0D or chr == 0x2028 or chr == 0x2029; 355 | -- 356 | elseif flags.unicode then 357 | local current_category = u_categories[chr] or 'Cn'; 358 | local first_category = current_category:sub(1, 1); 359 | if char_class == "alnum" then 360 | match = first_category == 'L' or current_category == 'Nl' or current_category == 'Nd'; 361 | elseif char_class == "alpha" then 362 | match = first_category == 'L' or current_category == 'Nl'; 363 | elseif char_class == "blank" then 364 | match = current_category == 'Zs' or chr == 0x09; 365 | elseif char_class == "cntrl" then 366 | match = current_category == 'Cc'; 367 | elseif char_class == "digit" then 368 | match = current_category == 'Nd'; 369 | elseif char_class == "graph" then 370 | match = first_category ~= 'P' and first_category ~= 'C'; 371 | elseif char_class == "lower" then 372 | match = current_category == 'Ll'; 373 | elseif char_class == "print" then 374 | match = first_category ~= 'C'; 375 | elseif char_class == "punct" then 376 | match = first_category == 'P'; 377 | elseif char_class == "space" then 378 | match = first_category == 'Z' or chr >= 0x09 and chr <= 0x0D; 379 | elseif char_class == "upper" then 380 | match = current_category == 'Lu'; 381 | elseif char_class == "word" then 382 | match = first_category == 'L' or current_category == 'Nl' or current_category == 'Nd' or current_category == 'Pc'; 383 | end; 384 | elseif char_class == "alnum" then 385 | match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A; 386 | elseif char_class == "alpha" then 387 | match = chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A; 388 | elseif char_class == "blank" then 389 | match = chr == 0x09 or chr == 0x20; 390 | elseif char_class == "cntrl" then 391 | match = chr <= 0x1F or chr == 0x7F; 392 | elseif char_class == "digit" then 393 | match = chr >= 0x30 and chr <= 0x39; 394 | elseif char_class == "graph" then 395 | match = chr >= 0x21 and chr <= 0x7E; 396 | elseif char_class == "lower" then 397 | match = chr >= 0x61 and chr <= 0x7A; 398 | elseif char_class == "print" then 399 | match = chr >= 0x20 and chr <= 0x7E; 400 | elseif char_class == "punct" then 401 | match = class_ascii_punct[chr]; 402 | elseif char_class == "space" then 403 | match = chr >= 0x09 and chr <= 0x0D or chr == 0x20; 404 | elseif char_class == "upper" then 405 | match = chr >= 0x41 and chr <= 0x5A; 406 | elseif char_class == "word" then 407 | match = chr >= 0x30 and chr <= 0x39 or chr >= 0x41 and chr <= 0x5A or chr >= 0x61 and chr <= 0x7A or chr == 0x5F; 408 | end; 409 | if negate then 410 | return not match; 411 | end; 412 | return match; 413 | elseif tkn_part[1] == "category" then 414 | local chr_category = u_categories[chr] or 'Cn'; 415 | local category_v = tkn_part[3]; 416 | local category_len = #category_v; 417 | if category_len == 3 then 418 | local match = false; 419 | if category_v == "Xan" or category_v == "Xwd" then 420 | match = chr_category:find("^[LN]") or category_v == "Xwd" and chr == 0x5F; 421 | elseif category_v == "Xps" or category_v == "Xsp" then 422 | match = chr_category:sub(1, 1) == 'Z' or chr >= 0x09 and chr <= 0x0D; 423 | elseif category_v == "Xuc" then 424 | match = tkn_char_match(xuc_chr, str_arr, i, flags, verb_flags); 425 | end; 426 | if tkn_part[2] then 427 | return not match; 428 | end 429 | return match; 430 | elseif chr_category:sub(1, category_len) == category_v then 431 | return not tkn_part[2]; 432 | end; 433 | return tkn_part[2]; 434 | elseif tkn_part[1] == 0x2E then 435 | return flags.dotAll or not is_newline(str_arr, i, verb_flags); 436 | elseif tkn_part[1] == 0x4E then 437 | return not is_newline(str_arr, i, verb_flags); 438 | elseif tkn_part[1] == 0x52 then 439 | if verb_flags.newline_seq == 0 then 440 | -- CR, LF or CRLF 441 | return chr == 0x0A or chr == 0x0D; 442 | end; 443 | -- any unicode newline 444 | return chr == 0x0A or chr == 0x0B or chr == 0x0C or chr == 0x0D or chr == 0x85 or chr == 0x2028 or chr == 0x2029; 445 | end; 446 | return false; 447 | end; 448 | 449 | local function find_alternation(token, i, count) 450 | while true do 451 | local v = token[i]; 452 | local is_table = type(v) == "table"; 453 | if v == alternation then 454 | return i, count; 455 | elseif is_table and v[1] == 0x28 then 456 | if count then 457 | count += v.count; 458 | end; 459 | i = v[3]; 460 | elseif is_table and v[1] == "quantifier" and type(v[5]) == "table" and v[5][1] == 0x28 then 461 | if count then 462 | count += v[5].count; 463 | end; 464 | i = v[5][3]; 465 | elseif not v or is_table and v[1] == 0x29 then 466 | return nil, count; 467 | elseif count then 468 | if is_table and v[1] == "quantifier" then 469 | count += v[3]; 470 | else 471 | count += 1; 472 | end; 473 | end; 474 | i += 1; 475 | end; 476 | end; 477 | 478 | local function re_rawfind(token, str_arr, init, flags, verb_flags, as_bool) 479 | local tkn_i, str_i, start_i = 0, init, init; 480 | local states = { }; 481 | while tkn_i do 482 | if tkn_i == 0 then 483 | tkn_i += 1; 484 | local next_alt = find_alternation(token, tkn_i); 485 | if next_alt then 486 | table.insert(states, 1, { "alternation", next_alt, str_i }); 487 | end; 488 | continue; 489 | end; 490 | local ctkn = token[tkn_i]; 491 | local tkn_type = type(ctkn) == "table" and ctkn[1]; 492 | if not ctkn then 493 | break; 494 | elseif ctkn == "ACCEPT" then 495 | local not_lookaround = true; 496 | local close_i = tkn_i; 497 | repeat 498 | close_i += 1; 499 | local is_table = type(token[close_i]) == "table"; 500 | local close_i_tkn = token[close_i]; 501 | if is_table and (close_i_tkn[1] == 0x28 or close_i_tkn[1] == "quantifier" and type(close_i_tkn[5]) == "table" and close_i_tkn[5][1] == 0x28) then 502 | close_i = close_i_tkn[1] == "quantifier" and close_i_tkn[5][3] or close_i_tkn[3]; 503 | elseif is_table and close_i_tkn[1] == 0x29 and (close_i_tkn[4] == 0x21 or close_i_tkn[4] == 0x3D) then 504 | not_lookaround = false; 505 | tkn_i = close_i; 506 | break; 507 | end; 508 | until not close_i_tkn; 509 | if not_lookaround then 510 | break; 511 | end; 512 | elseif ctkn == "PRUNE" or ctkn == "SKIP" then 513 | table.insert(states, 1, { ctkn, str_i }); 514 | tkn_i += 1; 515 | elseif tkn_type == 0x28 then 516 | table.insert(states, 1, { "group", tkn_i, str_i, nil, ctkn[2], ctkn[3], ctkn[4] }); 517 | tkn_i += 1; 518 | local next_alt, count = find_alternation(token, tkn_i, (ctkn[4] == 0x21 or ctkn[4] == 0x3D) and ctkn[5] and 0); 519 | if next_alt then 520 | table.insert(states, 1, { "alternation", next_alt, str_i }); 521 | end; 522 | if count then 523 | str_i -= count; 524 | end; 525 | elseif tkn_type == 0x29 and ctkn[4] ~= 0x21 then 526 | if ctkn[4] == 0x21 or ctkn[4] == 0x3D then 527 | while true do 528 | local selected_match_start; 529 | local selected_state = table.remove(states, 1); 530 | if selected_state[1] == "group" and selected_state[2] == ctkn[3] then 531 | if (ctkn[4] == 0x21 or ctkn[4] == 0x3D) and not ctkn[5] then 532 | str_i = selected_state[3]; 533 | end; 534 | if selected_match_start then 535 | table.insert(states, 1, selected_match_start); 536 | end; 537 | break; 538 | elseif selected_state[1] == "matchStart" and not selected_match_start and ctkn[4] == 0x3D then 539 | selected_match_start = selected_state; 540 | end; 541 | end; 542 | elseif ctkn[4] == 0x3E then 543 | repeat 544 | local selected_state = table.remove(states, 1); 545 | until not selected_state or selected_state[1] == "group" and selected_state[2] == ctkn[3]; 546 | else 547 | for i, v in ipairs(states) do 548 | if v[1] == "group" and v[2] == ctkn[3] then 549 | if v.jmp then 550 | -- recursive match 551 | tkn_i = v.jmp; 552 | end; 553 | v[4] = str_i; 554 | if v[7] == "quantifier" and v[10] + 1 < v[9] then 555 | if token[ctkn[3]][4] ~= "lazy" or v[10] + 1 < v[8] then 556 | tkn_i = ctkn[3]; 557 | end; 558 | local ctkn1 = token[ctkn[3]]; 559 | local new_group = { "group", v[2], str_i, nil, ctkn1[5][2], ctkn1[5][3], "quantifier", ctkn1[2], ctkn1[3], v[10] + 1, v[11], ctkn1[4] }; 560 | table.insert(states, 1, new_group); 561 | if v[11] then 562 | table.insert(states, 1, { "alternation", v[11], str_i }); 563 | end; 564 | end; 565 | break; 566 | end; 567 | end; 568 | end; 569 | tkn_i += 1; 570 | elseif tkn_type == 0x4B then 571 | table.insert(states, 1, { "matchStart", str_i }); 572 | tkn_i += 1; 573 | elseif tkn_type == 0x7C then 574 | local close_i = tkn_i; 575 | repeat 576 | close_i += 1; 577 | local is_table = type(token[close_i]) == "table"; 578 | local close_i_tkn = token[close_i]; 579 | if is_table and (close_i_tkn[1] == 0x28 or close_i_tkn[1] == "quantifier" and type(close_i_tkn[5]) == "table" and close_i_tkn[5][1] == 0x28) then 580 | close_i = close_i_tkn[1] == "quantifier" and close_i_tkn[5][3] or close_i_tkn[3]; 581 | end; 582 | until is_table and close_i_tkn[1] == 0x29 or not close_i_tkn; 583 | if token[close_i] then 584 | for _, v in ipairs(states) do 585 | if v[1] == "group" and v[6] == close_i then 586 | tkn_i = v[6]; 587 | break; 588 | end; 589 | end; 590 | else 591 | tkn_i = close_i; 592 | end; 593 | elseif tkn_type == "recurmatch" then 594 | table.insert(states, 1, { "group", ctkn[3], str_i, nil, nil, token[ctkn[3]][3], nil, jmp = tkn_i }); 595 | tkn_i = ctkn[3] + 1; 596 | local next_alt, count = find_alternation(token, tkn_i); 597 | if next_alt then 598 | table.insert(states, 1, { "alternation", next_alt, str_i }); 599 | end; 600 | else 601 | local match; 602 | if ctkn == "FAIL" then 603 | match = false; 604 | elseif tkn_type == 0x29 then 605 | repeat 606 | local selected_state = table.remove(states, 1); 607 | until selected_state[1] == "group" and selected_state[2] == ctkn[3]; 608 | elseif tkn_type == "quantifier" then 609 | if type(ctkn[5]) == "table" and ctkn[5][1] == 0x28 then 610 | local next_alt = find_alternation(token, tkn_i + 1); 611 | if next_alt then 612 | table.insert(states, 1, { "alternation", next_alt, str_i }); 613 | end; 614 | table.insert(states, next_alt and 2 or 1, { "group", tkn_i, str_i, nil, ctkn[5][2], ctkn[5][3], "quantifier", ctkn[2], ctkn[3], 0, next_alt, ctkn[4] }); 615 | if ctkn[4] == "lazy" and ctkn[2] == 0 then 616 | tkn_i = ctkn[5][3]; 617 | end; 618 | match = true; 619 | else 620 | local start_i, end_i; 621 | local pattern_count = 1; 622 | local is_backref = type(ctkn[5]) == "table" and ctkn[5][1] == "backref"; 623 | if is_backref then 624 | pattern_count = 0; 625 | local group_n = ctkn[5][2]; 626 | for _, v in ipairs(states) do 627 | if v[1] == "group" and v[5] == group_n then 628 | start_i, end_i = v[3], v[4]; 629 | pattern_count = end_i - start_i; 630 | break; 631 | end; 632 | end; 633 | end; 634 | local min_max_i = str_i + ctkn[2] * pattern_count; 635 | local mcount = 0; 636 | while mcount < ctkn[3] do 637 | if is_backref then 638 | if start_i and end_i then 639 | local org_i = str_i; 640 | if utf8_sub(str_arr.s, start_i, end_i) ~= utf8_sub(str_arr.s, org_i, str_i + pattern_count) then 641 | break; 642 | end; 643 | else 644 | break; 645 | end; 646 | elseif not tkn_char_match(ctkn[5], str_arr, str_i, flags, verb_flags) then 647 | break; 648 | end; 649 | str_i += pattern_count; 650 | mcount += 1; 651 | end; 652 | match = mcount >= ctkn[2]; 653 | if match and ctkn[4] ~= "possessive" then 654 | if ctkn[4] == "lazy" then 655 | min_max_i, str_i = str_i, min_max_i; 656 | end; 657 | table.insert(states, 1, { "quantifier", tkn_i, str_i, math.min(min_max_i, str_arr.n + 1), (ctkn[4] == "lazy" and 1 or -1) * pattern_count }); 658 | end; 659 | end; 660 | elseif tkn_type == "backref" then 661 | local start_i, end_i; 662 | local group_n = ctkn[2]; 663 | for _, v in ipairs(states) do 664 | if v[1] == "group" and v[5] == group_n then 665 | start_i, end_i = v[3], v[4]; 666 | break; 667 | end; 668 | end; 669 | if start_i and end_i then 670 | local org_i = str_i; 671 | str_i += end_i - start_i; 672 | match = utf8_sub(str_arr.s, start_i, end_i) == utf8_sub(str_arr.s, org_i, str_i); 673 | end; 674 | else 675 | local chr = str_arr[str_i]; 676 | if tkn_type == 0x24 or tkn_type == 0x5A or tkn_type == 0x7A then 677 | match = str_i == str_arr.n + 1 or tkn_type == 0x24 and flags.multiline and is_newline(str_arr, str_i + 1, verb_flags) or tkn_type == 0x5A and str_i == str_arr.n and is_newline(str_arr, str_i, verb_flags); 678 | elseif tkn_type == 0x5E or tkn_type == 0x41 or tkn_type == 0x47 then 679 | match = str_i == 1 or tkn_type == 0x5E and flags.multiline and is_newline(str_arr, str_i - 1, verb_flags) or tkn_type == 0x47 and str_i == init; 680 | elseif tkn_type == 0x42 or tkn_type == 0x62 then 681 | local start_m = str_i == 1 or flags.multiline and is_newline(str_arr, str_i - 1, verb_flags); 682 | local end_m = str_i == str_arr.n + 1 or flags.multiline and is_newline(str_arr, str_i, verb_flags); 683 | local w_m = tkn_char_match(ctkn[2], str_arr[str_i - 1], flags) and 0 or tkn_char_match(ctkn[2], chr, flags) and 1; 684 | if w_m == 0 then 685 | match = end_m or not tkn_char_match(ctkn[2], chr, flags); 686 | elseif w_m then 687 | match = start_m or not tkn_char_match(ctkn[2], str_arr[str_i - 1], flags); 688 | end; 689 | if tkn_type == 0x42 then 690 | match = not match; 691 | end; 692 | else 693 | match = tkn_char_match(ctkn, str_arr, str_i, flags, verb_flags); 694 | str_i += 1; 695 | end; 696 | end; 697 | if not match then 698 | while true do 699 | local prev_type, prev_state = states[1] and states[1][1], states[1]; 700 | if not prev_type or prev_type == "PRUNE" or prev_type == "SKIP" then 701 | if prev_type then 702 | table.clear(states); 703 | end; 704 | if start_i > str_arr.n then 705 | if as_bool then 706 | return false; 707 | end; 708 | return nil; 709 | end; 710 | start_i = prev_type == "SKIP" and prev_state[2] or start_i + 1; 711 | tkn_i, str_i = 0, start_i; 712 | break; 713 | elseif prev_type == "alternation" then 714 | tkn_i, str_i = prev_state[2], prev_state[3]; 715 | local next_alt, count = find_alternation(token, tkn_i + 1); 716 | if next_alt then 717 | prev_state[2] = next_alt; 718 | else 719 | table.remove(states, 1); 720 | end; 721 | if count then 722 | str_i -= count; 723 | end; 724 | break; 725 | elseif prev_type == "group" then 726 | if prev_state[7] == "quantifier" then 727 | if prev_state[12] == "greedy" and prev_state[10] >= prev_state[8] 728 | or prev_state[12] == "lazy" and prev_state[10] < prev_state[9] and not prev_state[13] then 729 | tkn_i, str_i = prev_state[12] == "greedy" and prev_state[6] or prev_state[2], prev_state[3]; 730 | if prev_state[12] == "greedy" then 731 | table.remove(states, 1); 732 | break; 733 | elseif prev_state[10] >= prev_state[8] then 734 | prev_state[13] = true; 735 | break; 736 | end; 737 | end; 738 | elseif prev_state[7] == 0x21 then 739 | table.remove(states, 1); 740 | tkn_i, str_i = prev_state[6], prev_state[3]; 741 | break; 742 | end; 743 | elseif prev_type == "quantifier" then 744 | if math.sign(prev_state[4] - prev_state[3]) == math.sign(prev_state[5]) then 745 | prev_state[3] += prev_state[5]; 746 | tkn_i, str_i = prev_state[2], prev_state[3]; 747 | break; 748 | end; 749 | end; 750 | -- keep match out state and recursive state, can be safely removed 751 | -- prevents infinite loop 752 | table.remove(states, 1); 753 | end; 754 | end; 755 | tkn_i += 1; 756 | end; 757 | end; 758 | if as_bool then 759 | return true; 760 | end; 761 | local match_start_ran = false; 762 | local span = table.create(token.group_n); 763 | span[0], span.n = { start_i, str_i }, token.group_n; 764 | for _, v in ipairs(states) do 765 | if v[1] == "matchStart" and not match_start_ran then 766 | span[0][1], match_start_ran = v[2], true; 767 | elseif v[1] == "group" and v[5] and not span[v[5]] then 768 | span[v[5]] = { v[3], v[4] }; 769 | end; 770 | end; 771 | return span; 772 | end; 773 | 774 | --[[ Methods ]]-- 775 | re_m.test = check_re('RegEx', 'test', function(self, str, init) 776 | return re_rawfind(self.token, to_str_arr(str, init), 1, self.flags, self.verb_flags, true); 777 | end); 778 | 779 | re_m.match = check_re('RegEx', 'match', function(self, str, init, source) 780 | local span = re_rawfind(self.token, to_str_arr(str, init), 1, self.flags, self.verb_flags, false); 781 | if not span then 782 | return nil; 783 | end; 784 | return new_match(span, self.group_id, source, str); 785 | end); 786 | 787 | re_m.matchall = check_re('RegEx', 'matchall', function(self, str, init, source) 788 | str = to_str_arr(str, init); 789 | local i = 1; 790 | return function() 791 | local span = i <= str.n + 1 and re_rawfind(self.token, str, i, self.flags, self.verb_flags, false); 792 | if not span then 793 | return nil; 794 | end; 795 | i = span[0][2] + (span[0][1] >= span[0][2] and 1 or 0); 796 | return new_match(span, self.group_id, source, str.s); 797 | end; 798 | end); 799 | 800 | local function insert_tokenized_sub(repl_r, str, span, tkn) 801 | for _, v in ipairs(tkn) do 802 | if type(v) == "table" then 803 | if v[1] == "condition" then 804 | if span[v[2]] then 805 | if v[3] then 806 | insert_tokenized_sub(repl_r, str, span, v[3]); 807 | else 808 | table.move(str, span[v[2]][1], span[v[2]][2] - 1, #repl_r + 1, repl_r); 809 | end; 810 | elseif v[4] then 811 | insert_tokenized_sub(repl_r, str, span, v[4]); 812 | end; 813 | else 814 | table.move(v, 1, #v, #repl_r + 1, repl_r); 815 | end; 816 | elseif span[v] then 817 | table.move(str, span[v][1], span[v][2] - 1, #repl_r + 1, repl_r); 818 | end; 819 | end; 820 | repl_r.n = #repl_r; 821 | return repl_r; 822 | end; 823 | 824 | re_m.sub = check_re('RegEx', 'sub', function(self, repl, str, n, repl_flag_str, source) 825 | if repl_flag_str ~= nil and type(repl_flag_str) ~= "number" and type(repl_flag_str) ~= "string" then 826 | error(string.format("invalid argument #5 to 'sub' (string expected, got %s)", typeof(repl_flag_str)), 3); 827 | end 828 | local repl_flags = { 829 | l = false, o = false, u = false, 830 | }; 831 | for f in string.gmatch(repl_flag_str or '', utf8.charpattern) do 832 | if repl_flags[f] ~= false then 833 | error("invalid regular expression substitution flag " .. f, 3); 834 | end; 835 | repl_flags[f] = true; 836 | end; 837 | local repl_type = type(repl); 838 | if repl_type == "number" then 839 | repl ..= ''; 840 | elseif repl_type ~= "string" and repl_type ~= "function" and (not repl_flags.o or repl_type ~= "table") then 841 | error(string.format("invalid argument #2 to 'sub' (string/function%s expected, got %s)", repl_flags.o and "/table" or '', typeof(repl)), 3); 842 | end; 843 | if tonumber(n) then 844 | n = tonumber(n); 845 | if n <= -1 or n ~= n then 846 | n = math.huge; 847 | end; 848 | elseif n ~= nil then 849 | error(string.format("invalid argument #4 to 'sub' (number expected, got %s)", typeof(n)), 3); 850 | else 851 | n = math.huge; 852 | end; 853 | if n < 1 then 854 | return str, 0; 855 | end; 856 | local min_repl_n = 0; 857 | if repl_type == "string" then 858 | repl = to_str_arr(repl); 859 | if not repl_flags.l then 860 | local i1 = 0; 861 | local repl_r = table.create(3); 862 | local group_n = self.token.group_n; 863 | local conditional_c = { }; 864 | while i1 < repl.n do 865 | local i2 = i1; 866 | repeat 867 | i2 += 1; 868 | until not repl[i2] or repl[i2] == 0x24 or repl[i2] == 0x5C or (repl[i2] == 0x3A or repl[i2] == 0x7D) and conditional_c[1]; 869 | min_repl_n += i2 - i1 - 1; 870 | if i2 - i1 > 1 then 871 | table.insert(repl_r, table.move(repl, i1 + 1, i2 - 1, 1, table.create(i2 - i1 - 1))); 872 | end; 873 | if repl[i2] == 0x3A then 874 | local current_conditional_c = conditional_c[1]; 875 | if current_conditional_c[2] then 876 | error("malformed substitution pattern", 3); 877 | end; 878 | current_conditional_c[2] = table.move(repl_r, current_conditional_c[3], #repl_r, 1, table.create(#repl_r + 1 - current_conditional_c[3])); 879 | for i3 = #repl_r, current_conditional_c[3], -1 do 880 | repl_r[i3] = nil; 881 | end; 882 | elseif repl[i2] == 0x7D then 883 | local current_conditional_c = table.remove(conditional_c, 1); 884 | local second_c = table.move(repl_r, current_conditional_c[3], #repl_r, 1, table.create(#repl_r + 1 - current_conditional_c[3])); 885 | for i3 = #repl_r, current_conditional_c[3], -1 do 886 | repl_r[i3] = nil; 887 | end; 888 | table.insert(repl_r, { "condition", current_conditional_c[1], current_conditional_c[2] ~= true and (current_conditional_c[2] or second_c), current_conditional_c[2] and second_c }); 889 | elseif repl[i2] then 890 | i2 += 1; 891 | local subst_c = repl[i2]; 892 | if not subst_c then 893 | if repl[i2 - 1] == 0x5C then 894 | error("replacement string must not end with a trailing backslash", 3); 895 | end; 896 | local prev_repl_f = repl_r[#repl_r]; 897 | if type(prev_repl_f) == "table" then 898 | table.insert(prev_repl_f, repl[i2 - 1]); 899 | else 900 | table.insert(repl_r, { repl[i2 - 1] }); 901 | end; 902 | elseif subst_c == 0x5C and repl[i2 - 1] == 0x24 then 903 | local prev_repl_f = repl_r[#repl_r]; 904 | if type(prev_repl_f) == "table" then 905 | table.insert(prev_repl_f, 0x24); 906 | else 907 | table.insert(repl_r, { 0x24 }); 908 | end; 909 | i2 -= 1; 910 | min_repl_n += 1; 911 | elseif subst_c == 0x30 then 912 | table.insert(repl_r, 0); 913 | elseif subst_c > 0x30 and subst_c <= 0x39 then 914 | local start_i2 = i2; 915 | local group_i = subst_c - 0x30; 916 | while repl[i2 + 1] and repl[i2 + 1] >= 0x30 and repl[i2 + 1] <= 0x39 do 917 | group_i ..= repl[i2 + 1] - 0x30; 918 | i2 += 1; 919 | end; 920 | group_i = tonumber(group_i); 921 | if not repl_flags.u and group_i > group_n then 922 | error("reference to non-existent subpattern", 3); 923 | end; 924 | table.insert(repl_r, group_i); 925 | elseif subst_c == 0x7B and repl[i2 - 1] == 0x24 then 926 | i2 += 1; 927 | local start_i2 = i2; 928 | while repl[i2] and 929 | (repl[i2] >= 0x30 and repl[i2] <= 0x39 930 | or repl[i2] >= 0x41 and repl[i2] <= 0x5A 931 | or repl[i2] >= 0x61 and repl[i2] <= 0x7A 932 | or repl[i2] == 0x5F) do 933 | i2 += 1; 934 | end; 935 | if (repl[i2] == 0x7D or repl[i2] == 0x3A and (repl[i2 + 1] == 0x2B or repl[i2 + 1] == 0x2D)) and i2 ~= start_i2 then 936 | local group_k = utf8_sub(repl.s, start_i2, i2); 937 | if repl[start_i2] >= 0x30 and repl[start_i2] <= 0x39 then 938 | group_k = tonumber(group_k); 939 | if not repl_flags.u and group_k > group_n then 940 | error("reference to non-existent subpattern", 3); 941 | end; 942 | else 943 | group_k = self.group_id[group_k]; 944 | if not repl_flags.u and (not group_k or group_k > group_n) then 945 | error("reference to non-existent subpattern", 3); 946 | end; 947 | end; 948 | if repl[i2] == 0x3A then 949 | i2 += 1; 950 | table.insert(conditional_c, { group_k, repl[i2] == 0x2D, #repl_r + 1 }); 951 | else 952 | table.insert(repl_r, group_k); 953 | end; 954 | else 955 | error("malformed substitution pattern", 3); 956 | end; 957 | else 958 | local c_escape_char; 959 | if repl[i2 - 1] == 0x24 then 960 | if subst_c ~= 0x24 then 961 | local prev_repl_f = repl_r[#repl_r]; 962 | if type(prev_repl_f) == "table" then 963 | table.insert(prev_repl_f, 0x24); 964 | else 965 | table.insert(repl_r, { 0x24 }); 966 | end; 967 | end; 968 | else 969 | c_escape_char = escape_chars[repl[i2]]; 970 | if type(c_escape_char) ~= "number" then 971 | c_escape_char = nil; 972 | end; 973 | end; 974 | local prev_repl_f = repl_r[#repl_r]; 975 | if type(prev_repl_f) == "table" then 976 | table.insert(prev_repl_f, c_escape_char or repl[i2]); 977 | else 978 | table.insert(repl_r, { c_escape_char or repl[i2] }); 979 | end; 980 | min_repl_n += 1; 981 | end; 982 | end; 983 | i1 = i2; 984 | end; 985 | if conditional_c[1] then 986 | error("malformed substitution pattern", 3); 987 | end; 988 | if not repl_r[2] and type(repl_r[1]) == "table" and repl_r[1][1] ~= "condition" then 989 | repl, repl.n = repl_r[1], #repl_r[1]; 990 | else 991 | repl, repl_type = repl_r, "subst_string"; 992 | end; 993 | end; 994 | end; 995 | str = to_str_arr(str); 996 | local incr, i0, count = 0, 1, 0; 997 | while i0 <= str.n + incr + 1 do 998 | local span = re_rawfind(self.token, str, i0, self.flags, self.verb_flags, false); 999 | if not span then 1000 | break; 1001 | end; 1002 | local repl_r; 1003 | if repl_type == "string" then 1004 | repl_r = repl; 1005 | elseif repl_type == "subst_string" then 1006 | repl_r = insert_tokenized_sub(table.create(min_repl_n), str, span, repl); 1007 | else 1008 | local re_match; 1009 | local repl_c; 1010 | if repl_type == "table" then 1011 | re_match = utf8_sub(str.s, span[0][1], span[0][2]); 1012 | repl_c = repl[re_match]; 1013 | else 1014 | re_match = new_match(span, self.group_id, source, str.s); 1015 | repl_c = repl(re_match); 1016 | end; 1017 | if repl_c == re_match or repl_flags.o and not repl_c then 1018 | local repl_n = span[0][2] - span[0][1]; 1019 | repl_r = table.move(str, span[0][1], span[0][2] - 1, 1, table.create(repl_n)); 1020 | repl_r.n = repl_n; 1021 | elseif type(repl_c) == "string" then 1022 | repl_r = to_str_arr(repl_c); 1023 | elseif type(repl_c) == "number" then 1024 | repl_r = to_str_arr(repl_c .. ''); 1025 | elseif repl_flags.o then 1026 | error(string.format("invalid replacement value (a %s)", type(repl_c)), 3); 1027 | else 1028 | repl_r = { n = 0 }; 1029 | end; 1030 | end; 1031 | local match_len = span[0][2] - span[0][1]; 1032 | local repl_len = math.min(repl_r.n, match_len); 1033 | for i1 = 0, repl_len - 1 do 1034 | str[span[0][1] + i1] = repl_r[i1 + 1]; 1035 | end; 1036 | local i1 = span[0][1] + repl_len; 1037 | i0 = span[0][2]; 1038 | if match_len > repl_r.n then 1039 | for i2 = 1, match_len - repl_r.n do 1040 | table.remove(str, i1); 1041 | incr -= 1; 1042 | i0 -= 1; 1043 | end; 1044 | elseif repl_r.n > match_len then 1045 | for i2 = 1, repl_r.n - match_len do 1046 | table.insert(str, i1 + i2 - 1, repl_r[repl_len + i2]); 1047 | incr += 1; 1048 | i0 += 1; 1049 | end; 1050 | end; 1051 | if match_len <= 0 then 1052 | i0 += 1; 1053 | end; 1054 | count += 1; 1055 | if n < count + 1 then 1056 | break; 1057 | end; 1058 | end; 1059 | return from_str_arr(str), count; 1060 | end); 1061 | 1062 | re_m.split = check_re('RegEx', 'split', function(self, str, n) 1063 | if tonumber(n) then 1064 | n = tonumber(n); 1065 | if n <= -1 or n ~= n then 1066 | n = math.huge; 1067 | end; 1068 | elseif n ~= nil then 1069 | error(string.format("invalid argument #3 to 'split' (number expected, got %s)", typeof(n)), 3); 1070 | else 1071 | n = math.huge; 1072 | end; 1073 | str = to_str_arr(str); 1074 | local i, count = 1, 0; 1075 | local ret = { }; 1076 | local prev_empty = 0; 1077 | while i <= str.n + 1 do 1078 | count += 1; 1079 | local span = n >= count and re_rawfind(self.token, str, i, self.flags, self.verb_flags, false); 1080 | if not span then 1081 | break; 1082 | end; 1083 | table.insert(ret, utf8_sub(str.s, i - prev_empty, span[0][1])); 1084 | prev_empty = span[0][1] >= span[0][2] and 1 or 0; 1085 | i = span[0][2] + prev_empty; 1086 | end; 1087 | table.insert(ret, string.sub(str.s, utf8.offset(str.s, i - prev_empty))); 1088 | return ret; 1089 | end); 1090 | 1091 | -- 1092 | local function re_index(self, index) 1093 | return re_m[index] or proxy[self].flags[index]; 1094 | end; 1095 | 1096 | local function re_tostr(self) 1097 | return proxy[self].pattern_repr .. proxy[self].flag_repr; 1098 | end; 1099 | -- 1100 | 1101 | local other_valid_group_char = { 1102 | -- non-capturing group 1103 | [0x3A] = true, 1104 | -- lookarounds 1105 | [0x21] = true, [0x3D] = true, 1106 | -- atomic 1107 | [0x3E] = true, 1108 | -- branch reset 1109 | [0x7C] = true, 1110 | }; 1111 | 1112 | local function tokenize_ptn(codes, flags) 1113 | if flags.unicode and not options.unicodeData then 1114 | return "options.unicodeData cannot be turned off while having unicode flag"; 1115 | end; 1116 | local i, len = 1, codes.n; 1117 | local group_n = 0; 1118 | local outln, group_id, verb_flags = { }, { }, { 1119 | newline = 1, newline_seq = 1, not_empty = 0, 1120 | }; 1121 | while i <= len do 1122 | local c = codes[i]; 1123 | if c == 0x28 then 1124 | -- Match 1125 | local ret; 1126 | if codes[i + 1] == 0x2A then 1127 | i += 2; 1128 | local start_i = i; 1129 | while codes[i] 1130 | and (codes[i] >= 0x30 and codes[i] <= 0x39 1131 | or codes[i] >= 0x41 and codes[i] <= 0x5A 1132 | or codes[i] >= 0x61 and codes[i] <= 0x7A 1133 | or codes[i] == 0x5F or codes[i] == 0x3A) do 1134 | i += 1; 1135 | end; 1136 | if codes[i] ~= 0x29 and codes[i - 1] ~= 0x3A then 1137 | -- fallback as normal and ( can't be repeated 1138 | return "quantifier doesn't follow a repeatable pattern"; 1139 | end; 1140 | local selected_verb = utf8_sub(codes.s, start_i, i); 1141 | if selected_verb == "positive_lookahead:" or selected_verb == "negative_lookhead:" 1142 | or selected_verb == "positive_lookbehind:" or selected_verb == "negative_lookbehind:" 1143 | or selected_verb:find("^[pn]l[ab]:$") then 1144 | ret = { 0x28, nil, nil, selected_verb:find('^n') and 0x21 or 0x3D, selected_verb:find('b', 3, true) and 1 }; 1145 | elseif selected_verb == "atomic:" then 1146 | ret = { 0x28, nil, nil, 0x3E, nil }; 1147 | elseif selected_verb == "ACCEPT" or selected_verb == "FAIL" or selected_verb == 'F' or selected_verb == "PRUNE" or selected_verb == "SKIP" then 1148 | ret = selected_verb == 'F' and "FAIL" or selected_verb; 1149 | else 1150 | if line_verbs[selected_verb] then 1151 | verb_flags.newline = selected_verb; 1152 | elseif selected_verb == "BSR_ANYCRLF" or selected_verb == "BSR_UNICODE" then 1153 | verb_flags.newline_seq = selected_verb == "BSR_UNICODE" and 1 or 0; 1154 | elseif selected_verb == "NOTEMPTY" or selected_verb == "NOTEMPTY_ATSTART" then 1155 | verb_flags.not_empty = selected_verb == "NOTEMPTY" and 1 or 2; 1156 | else 1157 | return "unknown or malformed verb"; 1158 | end; 1159 | if outln[1] then 1160 | return "this verb must be placed at the beginning of the regex"; 1161 | end; 1162 | end; 1163 | elseif codes[i + 1] == 0x3F then 1164 | -- ? syntax 1165 | i += 2; 1166 | if codes[i] == 0x23 then 1167 | -- comments 1168 | i = table.find(codes, 0x29, i); 1169 | if not i then 1170 | return "unterminated parenthetical"; 1171 | end; 1172 | i += 1; 1173 | continue; 1174 | elseif not codes[i] then 1175 | return "unterminated parenthetical"; 1176 | end; 1177 | ret = { 0x28, nil, nil, codes[i], nil }; 1178 | if codes[i] == 0x30 and codes[i + 1] == 0x29 then 1179 | -- recursive match entire pattern 1180 | ret[1], ret[2], ret[3], ret[5] = "recurmatch", 0, 0, nil; 1181 | elseif codes[i] > 0x30 and codes[i] <= 0x39 then 1182 | -- recursive match 1183 | local org_i = i; 1184 | i += 1; 1185 | while codes[i] >= 0x30 and codes[i] <= 0x30 do 1186 | i += 1; 1187 | end; 1188 | if codes[i] ~= 0x29 then 1189 | return "invalid group structure"; 1190 | end; 1191 | ret[1], ret[2], ret[4] = "recurmatch", tonumber(utf8_sub(codes.s, org_i, i)), nil; 1192 | elseif codes[i] == 0x3C and codes[i + 1] == 0x21 or codes[i + 1] == 0x3D then 1193 | -- lookbehinds 1194 | i += 1; 1195 | ret[4], ret[5] = codes[i], 1; 1196 | elseif codes[i] == 0x7C then 1197 | -- branch reset 1198 | ret[5] = group_n; 1199 | elseif codes[i] == 0x50 or codes[i] == 0x3C or codes[i] == 0x27 then 1200 | if codes[i] == 0x50 then 1201 | i += 1; 1202 | end; 1203 | if codes[i] == 0x3D then 1204 | -- backref 1205 | local start_i = i + 1; 1206 | while codes[i] and 1207 | (codes[i] >= 0x30 and codes[i] <= 0x39 1208 | or codes[i] >= 0x41 and codes[i] <= 0x5A 1209 | or codes[i] >= 0x61 and codes[i] <= 0x7A 1210 | or codes[i] == 0x5F) do 1211 | i += 1; 1212 | end; 1213 | if not codes[i] then 1214 | return "unterminated parenthetical"; 1215 | elseif codes[i] ~= 0x29 or i == start_i then 1216 | return "invalid group structure"; 1217 | end; 1218 | ret = { "backref", utf8_sub(codes.s, start_i, i) }; 1219 | elseif codes[i] == 0x3C or codes[i - 1] ~= 0x50 and codes[i] == 0x27 then 1220 | -- named capture 1221 | local delimiter = codes[i] == 0x27 and 0x27 or 0x3E; 1222 | local start_i = i + 1; 1223 | i += 1; 1224 | if codes[i] == 0x29 then 1225 | return "missing character in subpattern"; 1226 | elseif codes[i] >= 0x30 and codes[i] <= 0x39 then 1227 | return "subpattern name must not begin with a digit"; 1228 | elseif not (codes[i] >= 0x41 and codes[i] <= 0x5A or codes[i] >= 0x61 and codes[i] <= 0x7A or codes[i] == 0x5F) then 1229 | return "invalid character in subpattern"; 1230 | end; 1231 | i += 1; 1232 | while codes[i] and 1233 | (codes[i] >= 0x30 and codes[i] <= 0x39 1234 | or codes[i] >= 0x41 and codes[i] <= 0x5A 1235 | or codes[i] >= 0x61 and codes[i] <= 0x7A 1236 | or codes[i] == 0x5F) do 1237 | i += 1; 1238 | end; 1239 | if not codes[i] then 1240 | return "unterminated parenthetical"; 1241 | elseif codes[i] ~= delimiter then 1242 | return "invalid character in subpattern"; 1243 | end; 1244 | local name = utf8_sub(codes.s, start_i, i); 1245 | group_n += 1; 1246 | if (group_id[name] or group_n) ~= group_n then 1247 | return "subpattern name already exists"; 1248 | end; 1249 | for name1, group_n1 in pairs(group_id) do 1250 | if name ~= name1 and group_n == group_n1 then 1251 | return "different names for subpatterns of the same number aren't permitted"; 1252 | end; 1253 | end; 1254 | group_id[name] = group_n; 1255 | ret[2], ret[4] = group_n, nil; 1256 | else 1257 | return "invalid group structure"; 1258 | end; 1259 | elseif not other_valid_group_char[codes[i]] then 1260 | return "invalid group structure"; 1261 | end; 1262 | else 1263 | group_n += 1; 1264 | ret = { 0x28, group_n, nil, nil }; 1265 | end; 1266 | if ret then 1267 | table.insert(outln, ret); 1268 | end; 1269 | elseif c == 0x29 then 1270 | -- Close parenthesis 1271 | local i1 = #outln + 1; 1272 | local lookbehind_c = -1; 1273 | local current_lookbehind_c = 0; 1274 | local max_c, group_c = 0, 0; 1275 | repeat 1276 | i1 -= 1; 1277 | local v, is_table = outln[i1], type(outln[i1]) == "table"; 1278 | if is_table and v[1] == 0x28 then 1279 | group_c += 1; 1280 | if current_lookbehind_c and v.count then 1281 | current_lookbehind_c += v.count; 1282 | end; 1283 | if not v[3] then 1284 | if v[4] == 0x7C then 1285 | group_n = v[5] + math.max(max_c, group_c); 1286 | end; 1287 | if current_lookbehind_c ~= lookbehind_c and lookbehind_c ~= -1 then 1288 | lookbehind_c = nil; 1289 | else 1290 | lookbehind_c = current_lookbehind_c; 1291 | end; 1292 | break; 1293 | end; 1294 | elseif v == alternation then 1295 | if current_lookbehind_c ~= lookbehind_c and lookbehind_c ~= -1 then 1296 | lookbehind_c, current_lookbehind_c = nil, nil; 1297 | else 1298 | lookbehind_c, current_lookbehind_c = current_lookbehind_c, 0; 1299 | end; 1300 | max_c, group_c = math.max(max_c, group_c), 0; 1301 | elseif current_lookbehind_c then 1302 | if is_table and v[1] == "quantifier" then 1303 | if v[2] == v[3] then 1304 | current_lookbehind_c += v[2]; 1305 | else 1306 | current_lookbehind_c = nil; 1307 | end; 1308 | else 1309 | current_lookbehind_c += 1; 1310 | end; 1311 | end; 1312 | until i1 < 1; 1313 | if i1 < 1 then 1314 | return "unmatched ) in regular expression"; 1315 | end; 1316 | local v = outln[i1]; 1317 | local outln_len_p_1 = #outln + 1; 1318 | local ret = { 0x29, v[2], i1, v[4], v[5], count = lookbehind_c }; 1319 | if (v[4] == 0x21 or v[4] == 0x3D) and v[5] and not lookbehind_c then 1320 | return "lookbehind assertion is not fixed width"; 1321 | end; 1322 | v[3] = outln_len_p_1; 1323 | table.insert(outln, ret); 1324 | elseif c == 0x2E then 1325 | table.insert(outln, dot); 1326 | elseif c == 0x5B then 1327 | -- Character set 1328 | local negate, char_class = false, nil; 1329 | i += 1; 1330 | local start_i = i; 1331 | if codes[i] == 0x5E then 1332 | negate = true; 1333 | i += 1; 1334 | elseif codes[i] == 0x2E or codes[i] == 0x3A or codes[i] == 0x3D then 1335 | -- POSIX character classes 1336 | char_class = codes[i]; 1337 | end; 1338 | local ret; 1339 | if codes[i] == 0x5B or codes[i] == 0x5C then 1340 | ret = { }; 1341 | else 1342 | ret = { codes[i] }; 1343 | i += 1; 1344 | end; 1345 | while codes[i] ~= 0x5D do 1346 | if not codes[i] then 1347 | return "unterminated character class"; 1348 | elseif codes[i] == 0x2D and ret[1] and type(ret[1]) == "number" then 1349 | if codes[i + 1] == 0x5D then 1350 | table.insert(ret, 1, 0x2D); 1351 | else 1352 | i += 1; 1353 | local ret_c = codes[i]; 1354 | if ret_c == 0x5B then 1355 | if codes[i + 1] == 0x2E or codes[i + 1] == 0x3A or codes[i + 1] == 0x3D then 1356 | -- Check for POSIX character class, name does not matter 1357 | local i1 = i + 2; 1358 | repeat 1359 | i1 = table.find(codes, 0x5D, i1); 1360 | until not i1 or codes[i1 - 1] ~= 0x5C; 1361 | if not i1 then 1362 | return "unterminated character class"; 1363 | elseif codes[i1 - 1] == codes[i + 1] and i1 - 1 ~= i + 1 then 1364 | return "invalid range in character class"; 1365 | end; 1366 | end; 1367 | if ret[1] > 0x5B then 1368 | return "invalid range in character class"; 1369 | end; 1370 | elseif ret_c == 0x5C then 1371 | i += 1; 1372 | if codes[i] == 0x78 then 1373 | local radix0, radix1; 1374 | i += 1; 1375 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then 1376 | radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1377 | i += 1; 1378 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then 1379 | radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1380 | else 1381 | i -= 1; 1382 | end; 1383 | else 1384 | i -= 1; 1385 | end; 1386 | ret_c = radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0; 1387 | elseif codes[i] >= 0x30 and codes[i] <= 0x37 then 1388 | local radix0, radix1, radix2 = codes[i] - 0x30, nil, nil; 1389 | i += 1; 1390 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1391 | radix1 = codes[i] - 0x30; 1392 | i += 1; 1393 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1394 | radix2 = codes[i] - 0x30; 1395 | else 1396 | i -= 1; 1397 | end; 1398 | else 1399 | i -= 1; 1400 | end; 1401 | ret_c = radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0; 1402 | else 1403 | ret_c = escape_chars[codes[i]] or codes[i]; 1404 | if type(ret_c) ~= "number" then 1405 | return "invalid range in character class"; 1406 | end; 1407 | end; 1408 | elseif ret[1] > ret_c then 1409 | return "invalid range in character class"; 1410 | end; 1411 | ret[1] = { "range", ret[1], ret_c }; 1412 | end; 1413 | elseif codes[i] == 0x5B then 1414 | if codes[i + 1] == 0x2E or codes[i + 1] == 0x3A or codes[i + 1] == 0x3D then 1415 | local i1 = i + 2; 1416 | repeat 1417 | i1 = table.find(codes, 0x5D, i1); 1418 | until not i1 or codes[i1 - 1] ~= 0x5C; 1419 | if not i1 then 1420 | return "unterminated character class"; 1421 | elseif codes[i1 - 1] ~= codes[i + 1] or i1 - 1 == i + 1 then 1422 | table.insert(ret, 1, 0x5B); 1423 | elseif codes[i1 - 1] == 0x2E or codes[i1 - 1] == 0x3D then 1424 | return "POSIX collating elements aren't supported"; 1425 | elseif codes[i1 - 1] == 0x3A then 1426 | -- I have no plans to support escape codes (\) in character class names 1427 | local negate = codes[i + 3] == 0x5E; 1428 | local class_name = utf8_sub(codes.s, i + (negate and 3 or 2), i1 - 1); 1429 | -- If not valid then throw an error 1430 | if not posix_class_names[class_name] then 1431 | return "unknown POSIX class name"; 1432 | end; 1433 | table.insert(ret, 1, { "class", class_name, negate }); 1434 | i = i1; 1435 | end; 1436 | else 1437 | table.insert(ret, 1, 0x5B); 1438 | end; 1439 | elseif codes[i] == 0x5C then 1440 | i += 1; 1441 | if codes[i] == 0x78 then 1442 | local radix0, radix1; 1443 | i += 1; 1444 | if codes[i] == 0x7B then 1445 | i += 1; 1446 | local org_i = i; 1447 | while codes[i] and 1448 | (codes[i] >= 0x30 and codes[i] <= 0x39 1449 | or codes[i] >= 0x41 and codes[i] <= 0x46 1450 | or codes[i] >= 0x61 and codes[i] <= 0x66) do 1451 | i += 1; 1452 | end; 1453 | if codes[i] ~= 0x7D or i == org_i then 1454 | return "malformed hexadecimal character"; 1455 | elseif i - org_i > 4 then 1456 | return "character offset too large"; 1457 | end; 1458 | table.insert(ret, 1, tonumber(utf8_sub(codes.s, org_i, i), 16)); 1459 | else 1460 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then 1461 | radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1462 | i += 1; 1463 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66 then 1464 | radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1465 | else 1466 | i -= 1; 1467 | end; 1468 | else 1469 | i -= 1; 1470 | end; 1471 | table.insert(ret, 1, radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0); 1472 | end; 1473 | elseif codes[i] >= 0x30 and codes[i] <= 0x37 then 1474 | local radix0, radix1, radix2 = codes[i] - 0x30, nil, nil; 1475 | i += 1; 1476 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1477 | radix1 = codes[i] - 0x30; 1478 | i += 1; 1479 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1480 | radix2 = codes[i] - 0x30; 1481 | else 1482 | i -= 1; 1483 | end; 1484 | else 1485 | i -= 1; 1486 | end; 1487 | table.insert(ret, 1, radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0); 1488 | elseif codes[i] == 0x45 then 1489 | -- intentionally left blank, \E that's not preceded \Q is ignored 1490 | elseif codes[i] == 0x51 then 1491 | local start_i = i + 1; 1492 | repeat 1493 | i = table.find(codes, 0x5C, i + 1); 1494 | until not i or codes[i + 1] == 0x45; 1495 | table.move(codes, start_i, i and i - 1 or #codes, #outln + 1, outln); 1496 | if not i then 1497 | break; 1498 | end; 1499 | i += 1; 1500 | elseif codes[i] == 0x4E then 1501 | if codes[i + 1] == 0x7B and codes[i + 2] == 0x55 and codes[i + 3] == 0x2B and flags.unicode then 1502 | i += 4; 1503 | local start_i = i; 1504 | while codes[i] and 1505 | (codes[i] >= 0x30 and codes[i] <= 0x39 1506 | or codes[i] >= 0x41 and codes[i] <= 0x46 1507 | or codes[i] >= 0x61 and codes[i] <= 0x66) do 1508 | i += 1; 1509 | end; 1510 | if codes[i] ~= 0x7D or i == start_i then 1511 | return "malformed Unicode code point"; 1512 | end; 1513 | local code_point = tonumber(utf8_sub(codes.s, start_i, i)); 1514 | table.insert(ret, 1, code_point); 1515 | else 1516 | return "invalid escape sequence"; 1517 | end; 1518 | elseif codes[i] == 0x50 or codes[i] == 0x70 then 1519 | if not options.unicodeData then 1520 | return "options.unicodeData cannot be turned off when using \\p"; 1521 | end; 1522 | i += 1; 1523 | if codes[i] ~= 0x7B then 1524 | local c_name = utf8.char(codes[i] or 0); 1525 | if not valid_categories[c_name] then 1526 | return "unknown or malformed script name"; 1527 | end; 1528 | table.insert(ret, 1, { "category", false, c_name }); 1529 | else 1530 | local negate = codes[i] == 0x50; 1531 | i += 1; 1532 | if codes[i] == 0x5E then 1533 | i += 1; 1534 | negate = not negate; 1535 | end; 1536 | local start_i = i; 1537 | while codes[i] and 1538 | (codes[i] >= 0x30 and codes[i] <= 0x39 1539 | or codes[i] >= 0x41 and codes[i] <= 0x5A 1540 | or codes[i] >= 0x61 and codes[i] <= 0x7A 1541 | or codes[i] == 0x5F) do 1542 | i += 1; 1543 | end; 1544 | if codes[i] ~= 0x7D then 1545 | return "unknown or malformed script name"; 1546 | end; 1547 | local c_name = utf8_sub(codes.s, start_i, i); 1548 | local script_set = chr_scripts[c_name]; 1549 | if script_set then 1550 | table.insert(ret, 1, { "charset", negate, script_set }); 1551 | elseif not valid_categories[c_name] then 1552 | return "unknown or malformed script name"; 1553 | else 1554 | table.insert(ret, 1, { "category", negate, c_name }); 1555 | end; 1556 | end; 1557 | elseif codes[i] == 0x6F then 1558 | i += 1; 1559 | if codes[i] ~= 0x7B then 1560 | return "malformed octal code"; 1561 | end; 1562 | i += 1; 1563 | local org_i = i; 1564 | while codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 do 1565 | i += 1; 1566 | end; 1567 | if codes[i] ~= 0x7D or i == org_i then 1568 | return "malformed octal code"; 1569 | end; 1570 | local ret_chr = tonumber(utf8_sub(codes.s, org_i, i), 8); 1571 | if ret_chr > 0xFFFF then 1572 | return "character offset too large"; 1573 | end; 1574 | table.insert(ret, 1, ret_chr); 1575 | else 1576 | local esc_char = escape_chars[codes[i]]; 1577 | table.insert(ret, 1, type(esc_char) == "string" and { "class", esc_char, false } or esc_char or codes[i]); 1578 | end; 1579 | elseif flags.ignoreCase and codes[i] >= 0x61 and codes[i] <= 0x7A then 1580 | table.insert(ret, 1, codes[i] - 0x20); 1581 | else 1582 | table.insert(ret, 1, codes[i]); 1583 | end; 1584 | i += 1; 1585 | end; 1586 | if codes[i - 1] == char_class and i - 1 ~= start_i then 1587 | return char_class == 0x3A and "POSIX named classes are only support within a character set" or "POSIX collating elements aren't supported"; 1588 | end; 1589 | if not ret[2] and not negate then 1590 | table.insert(outln, ret[1]); 1591 | else 1592 | table.insert(outln, { "charset", negate, ret }); 1593 | end; 1594 | elseif c == 0x5C then 1595 | -- Escape char 1596 | i += 1; 1597 | local escape_c = codes[i]; 1598 | if not escape_c then 1599 | return "pattern may not end with a trailing backslash"; 1600 | elseif escape_c >= 0x30 and escape_c <= 0x39 then 1601 | local org_i = i; 1602 | while codes[i + 1] and codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39 do 1603 | i += 1; 1604 | end; 1605 | local escape_d = tonumber(utf8_sub(codes.s, org_i, i + 1)); 1606 | if escape_d > group_n and i ~= org_i then 1607 | i = org_i; 1608 | local radix0, radix1, radix2; 1609 | if codes[i] <= 0x37 then 1610 | radix0 = codes[i] - 0x30; 1611 | i += 1; 1612 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1613 | radix1 = codes[i] - 0x30; 1614 | i += 1; 1615 | if codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 then 1616 | radix2 = codes[i] - 0x30; 1617 | else 1618 | i -= 1; 1619 | end; 1620 | else 1621 | i -= 1; 1622 | end; 1623 | end; 1624 | table.insert(outln, radix0 and (radix1 and (radix2 and 64 * radix0 + 8 * radix1 + radix2 or 8 * radix0 + radix1) or radix0) or codes[org_i]); 1625 | else 1626 | table.insert(outln, { "backref", escape_d }); 1627 | end; 1628 | elseif escape_c == 0x45 then 1629 | -- intentionally left blank, \E that's not preceded \Q is ignored 1630 | elseif escape_c == 0x51 then 1631 | local start_i = i + 1; 1632 | repeat 1633 | i = table.find(codes, 0x5C, i + 1); 1634 | until not i or codes[i + 1] == 0x45; 1635 | table.move(codes, start_i, i and i - 1 or #codes, #outln + 1, outln); 1636 | if not i then 1637 | break; 1638 | end; 1639 | i += 1; 1640 | elseif escape_c == 0x4E then 1641 | if codes[i + 1] == 0x7B and codes[i + 2] == 0x55 and codes[i + 3] == 0x2B and flags.unicode then 1642 | i += 4; 1643 | local start_i = i; 1644 | while codes[i] and 1645 | (codes[i] >= 0x30 and codes[i] <= 0x39 1646 | or codes[i] >= 0x41 and codes[i] <= 0x46 1647 | or codes[i] >= 0x61 and codes[i] <= 0x66) do 1648 | i += 1; 1649 | end; 1650 | if codes[i] ~= 0x7D or i == start_i then 1651 | return "malformed Unicode code point"; 1652 | end; 1653 | local code_point = tonumber(utf8_sub(codes.s, start_i, i)); 1654 | table.insert(outln, code_point); 1655 | else 1656 | table.insert(outln, escape_chars[0x4E]); 1657 | end; 1658 | elseif escape_c == 0x50 or escape_c == 0x70 then 1659 | if not options.unicodeData then 1660 | return "options.unicodeData cannot be turned off when using \\p"; 1661 | end; 1662 | i += 1; 1663 | if codes[i] ~= 0x7B then 1664 | local c_name = utf8.char(codes[i] or 0); 1665 | if not valid_categories[c_name] then 1666 | return "unknown or malformed script name"; 1667 | end; 1668 | table.insert(outln, { "category", false, c_name }); 1669 | else 1670 | local negate = escape_c == 0x50; 1671 | i += 1; 1672 | if codes[i] == 0x5E then 1673 | i += 1; 1674 | negate = not negate; 1675 | end; 1676 | local start_i = i; 1677 | while codes[i] and 1678 | (codes[i] >= 0x30 and codes[i] <= 0x39 1679 | or codes[i] >= 0x41 and codes[i] <= 0x5A 1680 | or codes[i] >= 0x61 and codes[i] <= 0x7A 1681 | or codes[i] == 0x5F) do 1682 | i += 1; 1683 | end; 1684 | if codes[i] ~= 0x7D then 1685 | return "unknown or malformed script name"; 1686 | end; 1687 | local c_name = utf8_sub(codes.s, start_i, i); 1688 | local script_set = chr_scripts[c_name]; 1689 | if script_set then 1690 | table.insert(outln, { "charset", negate, script_set }); 1691 | elseif not valid_categories[c_name] then 1692 | return "unknown or malformed script name"; 1693 | else 1694 | table.insert(outln, { "category", negate, c_name }); 1695 | end; 1696 | end; 1697 | elseif escape_c == 0x67 and (codes[i + 1] == 0x7B or codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39) then 1698 | local is_grouped = false; 1699 | i += 1; 1700 | if codes[i] == 0x7B then 1701 | i += 1; 1702 | is_grouped = true; 1703 | elseif codes[i] < 0x30 or codes[i] > 0x39 then 1704 | return "malformed reference code"; 1705 | end; 1706 | local org_i = i; 1707 | while codes[i] and 1708 | (codes[i] >= 0x30 and codes[i] <= 0x39 1709 | or codes[i] >= 0x41 and codes[i] <= 0x46 1710 | or codes[i] >= 0x61 and codes[i] <= 0x66) do 1711 | i += 1; 1712 | end; 1713 | if is_grouped and codes[i] ~= 0x7D then 1714 | return "malformed reference code"; 1715 | end; 1716 | local ref_name = tonumber(utf8_sub(codes.s, org_i, i + (is_grouped and 0 or 1))); 1717 | table.insert(outln, { "backref", ref_name }); 1718 | if not is_grouped then 1719 | i -= 1; 1720 | end; 1721 | elseif escape_c == 0x6F then 1722 | i += 1; 1723 | if codes[i + 1] ~= 0x7B then 1724 | return "malformed octal code"; 1725 | end 1726 | i += 1; 1727 | local org_i = i; 1728 | while codes[i] and codes[i] >= 0x30 and codes[i] <= 0x37 do 1729 | i += 1; 1730 | end; 1731 | if codes[i] ~= 0x7D or i == org_i then 1732 | return "malformed octal code"; 1733 | end; 1734 | local ret_chr = tonumber(utf8_sub(codes.s, org_i, i), 8); 1735 | if ret_chr > 0xFFFF then 1736 | return "character offset too large"; 1737 | end; 1738 | table.insert(outln, ret_chr); 1739 | elseif escape_c == 0x78 then 1740 | local radix0, radix1; 1741 | i += 1; 1742 | if codes[i] == 0x7B then 1743 | i += 1; 1744 | local org_i = i; 1745 | while codes[i] and 1746 | (codes[i] >= 0x30 and codes[i] <= 0x39 1747 | or codes[i] >= 0x41 and codes[i] <= 0x46 1748 | or codes[i] >= 0x61 and codes[i] <= 0x66) do 1749 | i += 1; 1750 | end; 1751 | if codes[i] ~= 0x7D or i == org_i then 1752 | return "malformed hexadecimal code"; 1753 | elseif i - org_i > 4 then 1754 | return "character offset too large"; 1755 | end; 1756 | table.insert(outln, tonumber(utf8_sub(codes.s, org_i, i), 16)); 1757 | else 1758 | if codes[i] and (codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66) then 1759 | radix0 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1760 | i += 1; 1761 | if codes[i] and (codes[i] >= 0x30 and codes[i] <= 0x39 or codes[i] >= 0x41 and codes[i] <= 0x46 or codes[i] >= 0x61 and codes[i] <= 0x66) then 1762 | radix1 = codes[i] - ((codes[i] >= 0x41 and codes[i] <= 0x5A) and 0x37 or (codes[i] >= 0x61 and codes[i] <= 0x7A) and 0x57 or 0x30); 1763 | else 1764 | i -= 1; 1765 | end; 1766 | else 1767 | i -= 1; 1768 | end; 1769 | table.insert(outln, radix0 and (radix1 and 16 * radix0 + radix1 or radix0) or 0); 1770 | end; 1771 | else 1772 | local esc_char = b_escape_chars[escape_c] or escape_chars[escape_c]; 1773 | table.insert(outln, esc_char or escape_c); 1774 | end; 1775 | elseif c == 0x2A or c == 0x2B or c == 0x3F or c == 0x7B then 1776 | -- Quantifier 1777 | local start_q, end_q; 1778 | if c == 0x7B then 1779 | local org_i = i + 1; 1780 | local start_i; 1781 | while codes[i + 1] and (codes[i + 1] >= 0x30 and codes[i + 1] <= 0x39 or codes[i + 1] == 0x2C and not start_i and i + 1 ~= org_i) do 1782 | i += 1; 1783 | if codes[i] == 0x2C then 1784 | start_i = i; 1785 | end; 1786 | end; 1787 | if codes[i + 1] == 0x7D then 1788 | i += 1; 1789 | if not start_i then 1790 | start_q = tonumber(utf8_sub(codes.s, org_i, i)); 1791 | end_q = start_q; 1792 | else 1793 | start_q, end_q = tonumber(utf8_sub(codes.s, org_i, start_i)), start_i + 1 == i and math.huge or tonumber(utf8_sub(codes.s, start_i + 1, i)); 1794 | if end_q < start_q then 1795 | return "numbers out of order in {} quantifier"; 1796 | end; 1797 | end; 1798 | else 1799 | table.move(codes, org_i - 1, i, #outln + 1, outln); 1800 | end; 1801 | else 1802 | start_q, end_q = c == 0x2B and 1 or 0, c == 0x3F and 1 or math.huge; 1803 | end; 1804 | if start_q then 1805 | local quantifier_type = flags.ungreedy and "lazy" or "greedy"; 1806 | if codes[i + 1] == 0x2B or codes[i + 1] == 0x3F then 1807 | i += 1; 1808 | quantifier_type = codes[i] == 0x2B and "possessive" or flags.ungreedy and "greedy" or "lazy"; 1809 | end; 1810 | local outln_len = #outln; 1811 | local last_outln_value = outln[outln_len]; 1812 | if not last_outln_value or type(last_outln_value) == "table" and (last_outln_value[1] == "quantifier" or last_outln_value[1] == 0x28 or b_escape_chars[last_outln_value[1]]) 1813 | or last_outln_value == alternation or type(last_outln_value) == "string" then 1814 | return "quantifier doesn't follow a repeatable pattern"; 1815 | end; 1816 | if end_q == 0 then 1817 | table.remove(outln); 1818 | elseif start_q ~= 1 or end_q ~= 1 then 1819 | if type(last_outln_value) == "table" and last_outln_value[1] == 0x29 then 1820 | outln_len = last_outln_value[3]; 1821 | end; 1822 | outln[outln_len] = { "quantifier", start_q, end_q, quantifier_type, outln[outln_len] }; 1823 | end; 1824 | end; 1825 | elseif c == 0x7C then 1826 | -- Alternation 1827 | table.insert(outln, alternation); 1828 | local i1 = #outln; 1829 | repeat 1830 | i1 -= 1; 1831 | local v1, is_table = outln[i1], type(outln[i1]) == "table"; 1832 | if is_table and v1[1] == 0x29 then 1833 | i1 = outln[i1][3]; 1834 | elseif is_table and v1[1] == 0x28 then 1835 | if v1[4] == 0x7C then 1836 | group_n = v1[5]; 1837 | end; 1838 | break; 1839 | end; 1840 | until not v1; 1841 | elseif c == 0x24 or c == 0x5E then 1842 | table.insert(outln, c == 0x5E and beginning_str or end_str); 1843 | elseif flags.ignoreCase and c >= 0x61 and c <= 0x7A then 1844 | table.insert(outln, c - 0x20); 1845 | elseif flags.extended and (c >= 0x09 and c <= 0x0D or c == 0x20 or c == 0x23) then 1846 | if c == 0x23 then 1847 | repeat 1848 | i += 1; 1849 | until not codes[i] or codes[i] == 0x0A or codes[i] == 0x0D; 1850 | end; 1851 | else 1852 | table.insert(outln, c); 1853 | end; 1854 | i += 1; 1855 | end; 1856 | local max_group_n = 0; 1857 | for i, v in ipairs(outln) do 1858 | if type(v) == "table" and (v[1] == 0x28 or v[1] == "quantifier" and type(v[5]) == "table" and v[5][1] == 0x28) then 1859 | if v[1] == "quantifier" then 1860 | v = v[5]; 1861 | end; 1862 | if not v[3] then 1863 | return "unterminated parenthetical"; 1864 | elseif v[2] then 1865 | max_group_n = math.max(max_group_n, v[2]); 1866 | end; 1867 | elseif type(v) == "table" and (v[1] == "backref" or v[1] == "recurmatch") then 1868 | if not group_id[v[2]] and (type(v[2]) ~= "number" or v[2] > group_n) then 1869 | return "reference to a non-existent or invalid subpattern"; 1870 | elseif v[1] == "recurmatch" and v[2] ~= 0 then 1871 | for i1, v1 in ipairs(outln) do 1872 | if type(v1) == "table" and v1[1] == 0x28 and v1[2] == v[2] then 1873 | v[3] = i1; 1874 | break; 1875 | end; 1876 | end; 1877 | elseif type(v[2]) == "string" then 1878 | v[2] = group_id[v[2]]; 1879 | end; 1880 | end; 1881 | end; 1882 | outln.group_n = max_group_n; 1883 | return outln, group_id, verb_flags; 1884 | end; 1885 | 1886 | if not tonumber(options.cacheSize) then 1887 | error(string.format("expected number for options.cacheSize, got %s", typeof(options.cacheSize)), 2); 1888 | end; 1889 | local cacheSize = math.floor(options.cacheSize or 0) ~= 0 and tonumber(options.cacheSize); 1890 | local cache_pattern, cache_pattern_names; 1891 | if not cacheSize then 1892 | elseif cacheSize < 0 or cacheSize ~= cacheSize then 1893 | error("cache size cannot be a negative number or a NaN", 2); 1894 | elseif cacheSize == math.huge then 1895 | cache_pattern, cache_pattern_names = { nil }, { nil }; 1896 | elseif cacheSize >= 2 ^ 32 then 1897 | error("cache size too large", 2); 1898 | else 1899 | cache_pattern, cache_pattern_names = table.create(options.cacheSize), table.create(options.cacheSize); 1900 | end; 1901 | if cacheSize then 1902 | function re.pruge() 1903 | table.clear(cache_pattern_names); 1904 | table.clear(cache_pattern); 1905 | end; 1906 | end; 1907 | 1908 | local function new_re(str_arr, flags, flag_repr, pattern_repr) 1909 | local tokenized_ptn, group_id, verb_flags; 1910 | local cache_format = cacheSize and string.format("%s|%s", str_arr.s, flag_repr); 1911 | local cached_token = cacheSize and cache_pattern[table.find(cache_pattern_names, cache_format)]; 1912 | if cached_token then 1913 | tokenized_ptn, group_id, verb_flags = table.unpack(cached_token, 1, 3); 1914 | else 1915 | tokenized_ptn, group_id, verb_flags = tokenize_ptn(str_arr, flags); 1916 | if type(tokenized_ptn) == "string" then 1917 | error(tokenized_ptn, 2); 1918 | end; 1919 | if cacheSize and tokenized_ptn[1] then 1920 | table.insert(cache_pattern_names, 1, cache_format); 1921 | table.insert(cache_pattern, 1, { tokenized_ptn, group_id, verb_flags }); 1922 | if cacheSize ~= math.huge then 1923 | table.remove(cache_pattern_names, cacheSize + 1); 1924 | table.remove(cache_pattern, cacheSize + 1); 1925 | end; 1926 | end; 1927 | end; 1928 | 1929 | local object = newproxy(true); 1930 | proxy[object] = { name = "RegEx", flags = flags, flag_repr = flag_repr, pattern_repr = pattern_repr, token = tokenized_ptn, group_id = group_id, verb_flags = verb_flags }; 1931 | local object_mt = getmetatable(object); 1932 | object_mt.__index = setmetatable(flags, re_m); 1933 | object_mt.__tostring = re_tostr; 1934 | object_mt.__metatable = lockmsg; 1935 | 1936 | return object; 1937 | end; 1938 | 1939 | local function escape_fslash(pre) 1940 | return (#pre % 2 == 0 and '\\' or '') .. pre .. '.'; 1941 | end; 1942 | 1943 | local function sort_flag_chr(a, b) 1944 | return a:lower() < b:lower(); 1945 | end; 1946 | 1947 | function re.new(...) 1948 | if select('#', ...) == 0 then 1949 | error("missing argument #1 (string expected)", 2); 1950 | end; 1951 | local ptn, flags_str = ...; 1952 | if type(ptn) == "number" then 1953 | ptn ..= ''; 1954 | elseif type(ptn) ~= "string" then 1955 | error(string.format("invalid argument #1 (string expected, got %s)", typeof(ptn)), 2); 1956 | end; 1957 | if type(flags_str) ~= "string" and type(flags_str) ~= "number" and flags_str ~= nil then 1958 | error(string.format("invalid argument #2 (string expected, got %s)", typeof(flags_str)), 2); 1959 | end; 1960 | 1961 | local flags = { 1962 | anchored = false, caseless = false, multiline = false, dotall = false, unicode = false, ungreedy = false, extended = false, 1963 | }; 1964 | local flag_repr = { }; 1965 | for f in string.gmatch(flags_str or '', utf8.charpattern) do 1966 | if flags[flag_map[f]] ~= false then 1967 | error("invalid regular expression flag " .. f, 3); 1968 | end; 1969 | flags[flag_map[f]] = true; 1970 | table.insert(flag_repr, f); 1971 | end; 1972 | table.sort(flag_repr, sort_flag_chr); 1973 | flag_repr = table.concat(flag_repr); 1974 | return new_re(to_str_arr(ptn), flags, flag_repr, string.format("/%s/", ptn:gsub("(\\*)/", escape_fslash))); 1975 | end; 1976 | 1977 | function re.fromstring(...) 1978 | if select('#', ...) == 0 then 1979 | error("missing argument #1 (string expected)", 2); 1980 | end; 1981 | local ptn = ...; 1982 | if type(ptn) == "number" then 1983 | ptn ..= ''; 1984 | elseif type(ptn) ~= "string" then 1985 | error(string.format("invalid argument #1 (string expected, got %s)", typeof(ptn), 2)); 1986 | end; 1987 | local str_arr = to_str_arr(ptn); 1988 | local delimiter = str_arr[1]; 1989 | if not delimiter then 1990 | error("empty regex", 2); 1991 | elseif delimiter == 0x5C or (delimiter >= 0x30 and delimiter <= 0x39) or (delimiter >= 0x41 and delimiter <= 0x5A) or (delimiter >= 0x61 and delimiter <= 0x7A) then 1992 | error("delimiter must not be alphanumeric or a backslash", 2); 1993 | end; 1994 | 1995 | local i0 = 1; 1996 | repeat 1997 | i0 = table.find(str_arr, delimiter, i0 + 1); 1998 | if not i0 then 1999 | error(string.format("no ending delimiter ('%s') found", utf8.char(delimiter)), 2); 2000 | end; 2001 | local escape_count = 1; 2002 | while str_arr[i0 - escape_count] == 0x5C do 2003 | escape_count += 1; 2004 | end; 2005 | until escape_count % 2 == 1; 2006 | 2007 | local flags = { 2008 | anchored = false, caseless = false, multiline = false, dotall = false, unicode = false, ungreedy = false, extended = false, 2009 | }; 2010 | local flag_repr = { }; 2011 | while str_arr.n > i0 do 2012 | local f = utf8.char(table.remove(str_arr)); 2013 | str_arr.n -= 1; 2014 | if flags[flag_map[f]] ~= false then 2015 | error("invalid regular expression flag " .. f, 3); 2016 | end; 2017 | flags[flag_map[f]] = true; 2018 | table.insert(flag_repr, f); 2019 | end; 2020 | table.sort(flag_repr, sort_flag_chr); 2021 | flag_repr = table.concat(flag_repr); 2022 | table.remove(str_arr, 1); 2023 | table.remove(str_arr); 2024 | str_arr.n -= 2; 2025 | str_arr.s = string.sub(str_arr.s, 2, 1 + str_arr.n); 2026 | return new_re(str_arr, flags, flag_repr, string.sub(ptn, 1, 2 + str_arr.n)); 2027 | end; 2028 | 2029 | local re_escape_line_chrs = { 2030 | ['\0'] = '\\x00', ['\n'] = '\\n', ['\t'] = '\\t', ['\r'] = '\\r', ['\f'] = '\\f', 2031 | }; 2032 | 2033 | function re.escape(...) 2034 | if select('#', ...) == 0 then 2035 | error("missing argument #1 (string expected)", 2); 2036 | end; 2037 | local str, extended, delimiter = ...; 2038 | if type(str) == "number" then 2039 | str ..= ''; 2040 | elseif type(str) ~= "string" then 2041 | error(string.format("invalid argument #1 to 'escape' (string expected, got %s)", typeof(str)), 2); 2042 | end; 2043 | if delimiter == nil then 2044 | delimiter = ''; 2045 | elseif type(delimiter) == "number" then 2046 | delimiter ..= ''; 2047 | elseif type(delimiter) ~= "string" then 2048 | error(string.format("invalid argument #3 to 'escape' (string expected, got %s)", typeof(delimiter)), 2); 2049 | end; 2050 | if utf8.len(delimiter) > 1 or delimiter:match("^[%a\\]$") then 2051 | error("delimiter have not be alphanumeric", 2); 2052 | end; 2053 | return (string.gsub(str, "[\0\f\n\r\t]", re_escape_line_chrs):gsub(string.format("[\\%s#()%%%%*+.?[%%]^{|%s]", extended and '%s' or '', (delimiter:find'^[%%%]]$' and '%' or '') .. delimiter), "\\%1")); 2054 | end; 2055 | 2056 | function re.type(...) 2057 | if select('#', ...) == 0 then 2058 | error("missing argument #1", 2); 2059 | end; 2060 | return proxy[...] and proxy[...].name; 2061 | end; 2062 | 2063 | for k, f in pairs(re_m) do 2064 | re[k] = f; 2065 | end; 2066 | 2067 | re_m = { __index = re_m }; 2068 | 2069 | lockmsg = re.fromstring([[/The\s*metatable\s*is\s*(?:locked|inaccessible)(?#Nice try :])/i]]); 2070 | getmetatable(lockmsg).__metatable = lockmsg; 2071 | 2072 | local function readonly_table() 2073 | error("Attempt to modify a readonly table", 2); 2074 | end; 2075 | 2076 | match_m = { 2077 | __index = match_m, 2078 | __metatable = lockmsg, 2079 | __newindex = readonly_table, 2080 | }; 2081 | 2082 | re.Match = setmetatable({ }, match_m); 2083 | 2084 | return setmetatable({ }, { 2085 | __index = re, 2086 | __metatable = lockmsg, 2087 | __newindex = readonly_table, 2088 | }); 2089 | --------------------------------------------------------------------------------