├── .clang-format ├── .github └── workflows │ └── test.yml ├── .gitmodules ├── .lua-format ├── .luacheckrc ├── README.md ├── rockspecs ├── pcre2-0.1.0-1.rockspec ├── pcre2-0.1.1-1.rockspec ├── pcre2-0.1.2-1.rockspec ├── pcre2-0.2.0-1.rockspec └── pcre2-scm-1.rockspec ├── src └── pcre2.c └── test └── pcre2_test.lua /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: LLVM 4 | AlignAfterOpenBracket: Align 5 | AlignArrayOfStructures: Left 6 | AlignConsecutiveAssignments: AcrossComments 7 | AlignConsecutiveBitFields: AcrossComments 8 | AlignConsecutiveDeclarations: None 9 | AlignConsecutiveMacros: AcrossComments 10 | AlignEscapedNewlines: Right 11 | AlignOperands: Align 12 | AlignTrailingComments: true 13 | AllowAllArgumentsOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: false 15 | AllowShortBlocksOnASingleLine: Empty 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortEnumsOnASingleLine: false 18 | AllowShortFunctionsOnASingleLine: Empty 19 | AllowShortIfStatementsOnASingleLine: Never 20 | AllowShortLoopsOnASingleLine: false 21 | AlwaysBreakAfterReturnType: None 22 | AlwaysBreakBeforeMultilineStrings: false 23 | AlwaysBreakTemplateDeclarations: No 24 | BinPackArguments: true 25 | BinPackParameters: true 26 | BitFieldColonSpacing: None 27 | BraceWrapping: 28 | AfterCaseLabel: false 29 | AfterControlStatement: Never 30 | AfterEnum: false 31 | AfterFunction: false 32 | AfterStruct: false 33 | AfterUnion: false 34 | AfterExternBlock: false 35 | BeforeCatch: false 36 | BeforeElse: false 37 | BeforeWhile: false 38 | IndentBraces: false 39 | SplitEmptyFunction: true 40 | SplitEmptyRecord: true 41 | BreakBeforeBinaryOperators: None 42 | BreakBeforeBraces: Linux 43 | BreakBeforeTernaryOperators: false 44 | BreakStringLiterals: true 45 | ColumnLimit: 80 46 | CommentPragmas: '^ IWYU pragma:' 47 | ContinuationIndentWidth: 4 48 | DeriveLineEnding: true 49 | DerivePointerAlignment: true 50 | DisableFormat: false 51 | ExperimentalAutoDetectBinPacking: false 52 | FixNamespaceComments: true 53 | ForEachMacros: 54 | - foreach 55 | - Q_FOREACH 56 | - BOOST_FOREACH 57 | IncludeBlocks: Preserve 58 | IncludeCategories: 59 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 60 | Priority: 2 61 | SortPriority: 0 62 | - Regex: '^(<|"(gtest|gmock|isl|json)/)' 63 | Priority: 3 64 | SortPriority: 0 65 | - Regex: '.*' 66 | Priority: 1 67 | SortPriority: 0 68 | IncludeIsMainRegex: '(Test)?$' 69 | IncludeIsMainSourceRegex: '' 70 | IndentCaseBlocks: false 71 | IndentCaseLabels: false 72 | IndentExternBlock: NoIndent 73 | IndentGotoLabels: false 74 | IndentPPDirectives: AfterHash 75 | IndentWidth: 4 76 | IndentWrappedFunctionNames: false 77 | InsertTrailingCommas: None 78 | KeepEmptyLinesAtTheStartOfBlocks: false 79 | MacroBlockBegin: '' 80 | MacroBlockEnd: '' 81 | MaxEmptyLinesToKeep: 1 82 | NamespaceIndentation: None 83 | PPIndentWidth: 1 84 | PenaltyBreakAssignment: 2 85 | PenaltyBreakBeforeFirstCallParameter: 19 86 | PenaltyBreakComment: 300 87 | PenaltyBreakFirstLessLess: 120 88 | PenaltyBreakString: 1000 89 | PenaltyBreakTemplateDeclaration: 10 90 | PenaltyExcessCharacter: 1000000 91 | PenaltyReturnTypeOnItsOwnLine: 60 92 | PointerAlignment: Right 93 | # QualifierAlignment Leave 94 | # QualifierOrder [] 95 | ReflowComments: true 96 | SortIncludes: CaseSensitive 97 | SpaceAfterCStyleCast: false 98 | SpaceAfterLogicalNot: false 99 | SpaceBeforeAssignmentOperators: true 100 | SpaceBeforeCaseColon: false 101 | SpaceBeforeParens: ControlStatements 102 | SpaceBeforeRangeBasedForLoopColon: false 103 | SpaceBeforeSquareBrackets: false 104 | SpaceInEmptyBlock: false 105 | SpaceInEmptyParentheses: false 106 | SpacesBeforeTrailingComments: 1 107 | SpacesInCStyleCastParentheses: false 108 | SpacesInConditionalStatement: false 109 | SpacesInContainerLiterals: false 110 | SpacesInParentheses: false 111 | SpacesInSquareBrackets: false 112 | Standard: Latest 113 | StatementMacros: 114 | - Q_UNUSED 115 | - QT_REQUIRE_VERSION 116 | TabWidth: 8 117 | UseCRLF: false 118 | UseTab: Never 119 | WhitespaceSensitiveMacros: 120 | - STRINGIZE 121 | - PP_STRINGIZE 122 | - BOOST_PP_STRINGIZE 123 | ... 124 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | luacheck: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - 10 | name: Checkout 11 | uses: actions/checkout@v2 12 | with: 13 | submodules: 'true' 14 | - 15 | name: Setup Lua 16 | uses: leafo/gh-actions-lua@v8.0.0 17 | - 18 | name: Setup Luarocks 19 | uses: leafo/gh-actions-luarocks@v4.0.0 20 | - 21 | name: Install Tools 22 | run: luarocks install luacheck 23 | - 24 | name: Run luacheck 25 | run: | 26 | luacheck . 27 | 28 | test: 29 | runs-on: ubuntu-latest 30 | strategy: 31 | matrix: 32 | lua-version: 33 | - "5.1" 34 | - "5.2" 35 | - "5.3" 36 | - "5.4" 37 | 38 | steps: 39 | - 40 | name: Checkout 41 | uses: actions/checkout@v2 42 | with: 43 | submodules: 'true' 44 | - 45 | name: Setup Lua ${{ matrix.lua-version }} 46 | uses: leafo/gh-actions-lua@v8.0.0 47 | with: 48 | luaVersion: ${{ matrix.lua-version }} 49 | - 50 | name: Setup Luarocks 51 | uses: leafo/gh-actions-luarocks@v4.0.0 52 | - 53 | name: Install Tools 54 | run: | 55 | sudo apt-get install libpcre2-dev -y 56 | luarocks install dump 57 | - 58 | name: Install 59 | run: | 60 | luarocks make 61 | - 62 | name: Run Test 63 | run: | 64 | lua ./test/pcre2_test.lua 65 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/lauxhlib"] 2 | path = deps/lauxhlib 3 | url = https://github.com/mah0x211/lauxhlib.git 4 | -------------------------------------------------------------------------------- /.lua-format: -------------------------------------------------------------------------------- 1 | break_after_table_lb: true 2 | break_before_table_rb: false 3 | break_before_functioncall_rp: true 4 | break_before_functiondef_rp: true 5 | chop_down_table: true 6 | extra_sep_at_table_end: true 7 | keep_simple_control_block_one_line: false 8 | keep_simple_function_one_line: false 9 | column_table_limit: 1 10 | -------------------------------------------------------------------------------- /.luacheckrc: -------------------------------------------------------------------------------- 1 | std = 'max' 2 | include_files = { 3 | 'test/*_test.lua', 4 | } 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lua-pcre2 2 | 3 | [![test](https://github.com/mah0x211/lua-pcre2/actions/workflows/test.yml/badge.svg)](https://github.com/mah0x211/lua-pcre2/actions/workflows/test.yml) 4 | 5 | 6 | PCRE2 bindings for lua. 7 | 8 | **NOTE:** this module is under heavy development. 9 | 10 | 11 | ## Dependencies 12 | 13 | - lauxhlib: 14 | 15 | --- 16 | 17 | ## pcre2 module 18 | 19 | ```lua 20 | local pcre2 = require('pcre2') 21 | ``` 22 | 23 | 24 | ## Constants 25 | 26 | 27 | ### Compile options 28 | 29 | - `ANCHORED`: Force pattern anchoring. 30 | - `ALT_BSUX`: Alternative handling of `\u`, `\U`, and `\x`. 31 | - `ALT_CIRCUMFLEX`: Alternative handling of `^` in multiline mode. 32 | - `AUTO_CALLOUT`: Compile automatic callouts. 33 | - `CASELESS`: Do caseless matching. 34 | - `DOLLAR_ENDONLY`: `$` not to match newline at end. 35 | - `DOTALL`: `.` matches anything including NL. 36 | - `DUPNAMES`: Allow duplicate names for subpatterns. 37 | - `EXTENDED`: Ignore white space and `#` comments. 38 | - `FIRSTLINE`: Force matching to be before newline. 39 | - `MATCH_UNSET_BACKREF`: Match unset back references. 40 | - `MULTILINE`: `^` and `$` match newlines within data. 41 | - `NEVER_BACKSLASH_C`: Lock out the use of `\C` in patterns. 42 | - `NEVER_UCP`: Lock out `UCP` option, e.g. via (*UCP) 43 | - `NEVER_UTF`: Lock out `UTF` option, e.g. via (*UTF) 44 | - `NO_AUTO_CAPTURE`: Disable numbered capturing par theses. (named ones available) 45 | - `NO_AUTO_POSSESS`: Disable auto-possessification. 46 | - `NO_DOTSTAR_ANCHOR`: Disable automatic anchoring for `.*`. 47 | - `NO_START_OPTIMIZE`: Disable match-time start optimizations. 48 | - `NO_UTF_CHECK`: Do not check the pattern for `UTF` valid. (only relevant if `UTF` option is set) 49 | - `UCP`: Use Unicode properties for `\d`, `\w`, etc. 50 | - `UNGREEDY`: Invert greediness of quantifiers. 51 | - `UTF`: Treat pattern and subjects as UTF strings 52 | 53 | 54 | ### JIT Compile options 55 | 56 | - `JIT_COMPLETE`: compile code for full matching. 57 | - `JIT_PARTIAL_SOFT`: compile code for soft partial matching. 58 | - `JIT_PARTIAL_HARD`: compile code for hard partial matching. 59 | 60 | 61 | ### Match options 62 | 63 | - `ANCHORED`: Match only at the first position. 64 | - `NOTBOL`: Subject string is not the beginning of a line. 65 | - `NOTEOL`: Subject string is not the end of a line. 66 | - `NOTEMPTY`: An empty string is not a valid match. 67 | - `NOTEMPTY_ATSTART`: An empty string at the start of the subject is not a valid match. 68 | - `NO_UTF_CHECK`: Do not check the subject for UTF validity (only relevant if `UTF` option was set at compile time) 69 | - `PARTIAL_SOFT`: Return `PCRE2_ERROR_PARTIAL` for a partial match if no full matches are found. 70 | - `PARTIAL_HARD`: Return `PCRE2_ERROR_PARTIAL` for a partial match if that is found before a full match. 71 | 72 | For details of partial matching, see the `pcre2partial` page. 73 | 74 | 75 | ## Creating a PCRE2 object 76 | 77 | ### re, err = pcre2.new( pattern [, opt, ...] ) 78 | 79 | creates a new PCRE2 object. 80 | 81 | **Params** 82 | 83 | - `pattern:string`: string containing expression to be compiled. 84 | - `opt, ...:number`: [Compile options](#compile-options). 85 | 86 | **Returns** 87 | 88 | - `re:pcre2`: PCRE2 object. 89 | - `err:string`: error message. 90 | 91 | 92 | ## Methods 93 | 94 | ### ok, err = re:jit_compile( [opt, ...] ) 95 | 96 | This function requests JIT compilation, which, if the just-in-time compiler is available, further processes a compiled pattern into machine code that executes much faster than the pcre2_match() interpretive matching function. Full details are given in the `pcre2jit` documentation. 97 | 98 | **Params** 99 | 100 | - `opt, ...:number`: [JIT Compile options](#jit-compile-options). 101 | 102 | **Returns** 103 | 104 | - `ok:boolean`: true on success. 105 | - `err:string`: error message. 106 | 107 | 108 | ### head, tail, err = re:match( sbj [, offset [, opt, ...]] ) 109 | 110 | matches a compiled regular expression against a given subject string, using a matching algorithm that is similar to Perl's. It returns offsets to captured substrings. 111 | 112 | **Params** 113 | 114 | - `sbj:string`: the subject string. 115 | - `offset:number`: offset in the subject at which to start matching. 116 | - `opt, ...:number`: [Match options](#match-options). 117 | 118 | **Returns** 119 | 120 | - `head:table`: array of start offsets. 121 | - `tail:table`: array of end offsets. 122 | - `err:string`: error message. 123 | 124 | 125 | ### head, tail, err = re:match_nocap( sbj [, offset [, opt, ...]] ) 126 | 127 | almost same as `match` method but it returns only offsets of matched string. 128 | 129 | **Params** 130 | 131 | - `sbj:string`: the subject string. 132 | - `offset:number`: offset in the subject at which to start matching. 133 | - `opt, ...:number`: [Match options](#match-options). 134 | 135 | **Returns** 136 | 137 | - `head:number`: start offsets. 138 | - `tail:number`: end offsets. 139 | - `err:string`: error message. 140 | 141 | 142 | ## Example 143 | 144 | ```lua 145 | local pcre2 = require('pcre2') 146 | 147 | local re = assert( pcre2.new('(\\d+)(\\w)') ) 148 | 149 | assert( re:jit_compile( pcre2.JIT_COMPLETE ) ) 150 | 151 | local sbj = 'abc081abc134klj567' 152 | local head, tail, err = re:match( sbj ) 153 | 154 | while head do 155 | print( 'match' ) 156 | for i = 1, #head do 157 | print( i, sbj:sub( head[i], tail[i] ) ) 158 | end 159 | 160 | head, tail, err = re:match( sbj, tail[1] ) 161 | end 162 | 163 | if err then 164 | error( err ) 165 | end 166 | 167 | print( 'done' ) 168 | 169 | --[[ 170 | this script will be output the following strings; 171 | 172 | match 173 | 1 081a 174 | 2 081 175 | 3 a 176 | match 177 | 1 134k 178 | 2 134 179 | 3 k 180 | match 181 | 1 567 182 | 2 56 183 | 3 7 184 | done 185 | ]] 186 | ``` 187 | -------------------------------------------------------------------------------- /rockspecs/pcre2-0.1.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "pcre2" 2 | version = "0.1.0-1" 3 | source = { 4 | url = "gitrec://github.com/mah0x211/lua-pcre2.git", 5 | tag = "v0.1.0" 6 | } 7 | description = { 8 | summary = "PCRE2 bindings for lua", 9 | homepage = "https://github.com/mah0x211/lua-pcre2", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "luarocks-fetch-gitrec >= 0.2" 16 | } 17 | build = { 18 | type = "command", 19 | build_command = [[ 20 | CFLAGS="$(CFLAGS)" sh build_deps.sh && autoreconf -ivf && CFLAGS="$(CFLAGS)" CPPFLAGS="-I$(LUA_INCDIR)" LIBFLAG="$(LIBFLAG)" OBJ_EXTENSION="$(OBJ_EXTENSION)" LIB_EXTENSION="$(LIB_EXTENSION)" LIBDIR="$(LIBDIR)" CONFDIR="$(CONFDIR)" ./configure && make clean && make 21 | ]], 22 | install_command = [[ 23 | make install 24 | ]] 25 | } 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /rockspecs/pcre2-0.1.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "pcre2" 2 | version = "0.1.1-1" 3 | source = { 4 | url = "gitrec://github.com/mah0x211/lua-pcre2.git", 5 | tag = "v0.1.1" 6 | } 7 | description = { 8 | summary = "PCRE2 bindings for lua", 9 | homepage = "https://github.com/mah0x211/lua-pcre2", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Teruya" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | "luarocks-fetch-gitrec >= 0.2" 16 | } 17 | build = { 18 | type = "command", 19 | build_command = [[ 20 | CFLAGS="$(CFLAGS)" sh build_deps.sh && autoreconf -ivf && CFLAGS="$(CFLAGS)" CPPFLAGS="-I$(LUA_INCDIR)" LIBFLAG="$(LIBFLAG)" OBJ_EXTENSION="$(OBJ_EXTENSION)" LIB_EXTENSION="$(LIB_EXTENSION)" LIBDIR="$(LIBDIR)" CONFDIR="$(CONFDIR)" ./configure && make clean && make 21 | ]], 22 | install_command = [[ 23 | make install 24 | ]] 25 | } 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /rockspecs/pcre2-0.1.2-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "pcre2" 3 | version = "0.1.2-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-pcre2.git", 6 | tag = "v0.1.2" 7 | } 8 | description = { 9 | summary = "PCRE2 bindings for lua", 10 | homepage = "https://github.com/mah0x211/lua-pcre2", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Fukunaga" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | build = { 18 | type = "command", 19 | build_command = [[ 20 | CFLAGS="$(CFLAGS)" sh build_deps.sh && autoreconf -ivf && CFLAGS="$(CFLAGS)" CPPFLAGS="-I$(LUA_INCDIR)" LIBFLAG="$(LIBFLAG)" OBJ_EXTENSION="$(OBJ_EXTENSION)" LIB_EXTENSION="$(LIB_EXTENSION)" LIBDIR="$(LIBDIR)" CONFDIR="$(CONFDIR)" ./configure && make clean && make 21 | ]], 22 | install_command = [[ 23 | make install 24 | ]] 25 | } 26 | -------------------------------------------------------------------------------- /rockspecs/pcre2-0.2.0-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "pcre2" 3 | version = "0.2.0-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-pcre2.git", 6 | tag = "v0.2.0", 7 | } 8 | description = { 9 | summary = "PCRE2 bindings for lua", 10 | homepage = "https://github.com/mah0x211/lua-pcre2", 11 | license = "MIT/X11", 12 | maintainer = "Masatoshi Fukunaga" 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | external_dependencies = { 18 | LIBPCRE2 = { 19 | header = "pcre2.h", 20 | library = "pcre2-8", 21 | } 22 | } 23 | build = { 24 | type = "builtin", 25 | modules = { 26 | ["pcre2"] = { 27 | sources = { "src/pcre2.c" }, 28 | libraries = { "pcre2-8" }, 29 | incdirs = { 30 | "deps/lauxhlib", 31 | "$(LIBPCRE2_INCDIR)" 32 | }, 33 | libdirs = { 34 | "$(LIBPCRE2_LIBDIR)" 35 | } 36 | }, 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /rockspecs/pcre2-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | rockspec_format = "3.0" 2 | package = "pcre2" 3 | version = "scm-1" 4 | source = { 5 | url = "git+https://github.com/mah0x211/lua-pcre2.git" 6 | } 7 | description = { 8 | summary = "PCRE2 bindings for lua", 9 | homepage = "https://github.com/mah0x211/lua-pcre2", 10 | license = "MIT/X11", 11 | maintainer = "Masatoshi Fukunaga" 12 | } 13 | dependencies = { 14 | "lua >= 5.1", 15 | } 16 | external_dependencies = { 17 | LIBPCRE2 = { 18 | header = "pcre2.h", 19 | library = "pcre2-8", 20 | } 21 | } 22 | build = { 23 | type = "builtin", 24 | modules = { 25 | ["pcre2"] = { 26 | sources = { "src/pcre2.c" }, 27 | libraries = { "pcre2-8" }, 28 | incdirs = { 29 | "deps/lauxhlib", 30 | "$(LIBPCRE2_INCDIR)" 31 | }, 32 | libdirs = { 33 | "$(LIBPCRE2_LIBDIR)" 34 | } 35 | }, 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/pcre2.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2017 Masatoshi Teruya 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | * 22 | * pcre2.c 23 | * lua-pcre2 24 | * 25 | * Created by Masatoshi Teruya on 17/05/29. 26 | */ 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #define PCRE2_CODE_UNIT_WIDTH 8 36 | #include "pcre2.h" 37 | 38 | // MARK: lua binding 39 | #define MODULE_MT "pcre2" 40 | 41 | #define REGEX_MODE_JIT 0x1 42 | 43 | typedef struct { 44 | uint32_t mode; 45 | pcre2_code *code; 46 | } lpcre2_t; 47 | 48 | typedef struct { 49 | int len; 50 | const char msg[256]; 51 | } lpcre2_error_t; 52 | 53 | static inline void lpcre2_strerror(lpcre2_error_t *err, int errnum) 54 | { 55 | err->len = pcre2_get_error_message(errnum, (PCRE2_UCHAR *)err->msg, 56 | sizeof(err->msg)); 57 | } 58 | 59 | static inline int lpcre2_match_lua(lua_State *L, int nocap) 60 | { 61 | lpcre2_t *p = lauxh_checkudata(L, 1, MODULE_MT); 62 | size_t len = 0; 63 | PCRE2_SPTR sbj = (PCRE2_SPTR)lauxh_checklstring(L, 2, &len); 64 | lua_Integer offset = lauxh_optinteger(L, 3, 0); 65 | uint32_t opts = lauxh_optflags(L, 4); 66 | pcre2_match_data *data = 67 | pcre2_match_data_create_from_pattern(p->code, NULL); 68 | 69 | if (data) { 70 | int rc = 0; 71 | 72 | if (p->mode & REGEX_MODE_JIT) { 73 | rc = pcre2_jit_match(p->code, sbj, len, offset, opts, data, NULL); 74 | } else { 75 | rc = pcre2_match(p->code, sbj, len, offset, opts, data, NULL); 76 | } 77 | 78 | if (rc > 0) { 79 | PCRE2_SIZE *ovec = pcre2_get_ovector_pointer(data); 80 | 81 | // push only offsets of matched strings 82 | if (nocap) { 83 | lua_pushinteger(L, ovec[0] + 1); 84 | lua_pushinteger(L, ovec[1]); 85 | } else { 86 | int i = 0; 87 | 88 | // create head and tail arrays 89 | lua_createtable(L, rc, 0); 90 | lua_createtable(L, rc, 0); 91 | for (; i < rc; i++) { 92 | lua_pushinteger(L, ovec[i * 2] + 1); 93 | lua_rawseti(L, -3, i + 1); 94 | lua_pushinteger(L, ovec[i * 2 + 1]); 95 | lua_rawseti(L, -2, i + 1); 96 | } 97 | } 98 | rc = 2; 99 | } 100 | // got error 101 | else { 102 | lpcre2_error_t err; 103 | 104 | lua_pushnil(L); 105 | lua_pushnil(L); 106 | switch (rc) { 107 | case PCRE2_ERROR_NOMATCH: 108 | rc = 2; 109 | break; 110 | 111 | default: 112 | lpcre2_strerror(&err, rc); 113 | lua_pushlstring(L, err.msg, err.len); 114 | rc = 3; 115 | break; 116 | } 117 | } 118 | 119 | pcre2_match_data_free(data); 120 | 121 | return rc; 122 | } 123 | 124 | // got mem error 125 | lua_pushnil(L); 126 | lua_pushstring(L, strerror(errno)); 127 | 128 | return 2; 129 | } 130 | 131 | static int match_nocap_lua(lua_State *L) 132 | { 133 | return lpcre2_match_lua(L, 1); 134 | } 135 | 136 | static int match_lua(lua_State *L) 137 | { 138 | return lpcre2_match_lua(L, 0); 139 | } 140 | 141 | static int jit_compile_lua(lua_State *L) 142 | { 143 | lpcre2_t *p = lauxh_checkudata(L, 1, MODULE_MT); 144 | uint32_t opts = lauxh_optflags(L, 2); 145 | 146 | // jit-compile if opts specified 147 | if (opts) { 148 | int rc = pcre2_jit_compile(p->code, opts); 149 | 150 | if (rc) { 151 | lpcre2_error_t err; 152 | 153 | lpcre2_strerror(&err, rc); 154 | lua_pushboolean(L, 0); 155 | lua_pushfstring(L, "PCRE2 JIT compilation failed: %s", err.msg); 156 | return 2; 157 | } 158 | 159 | p->mode |= REGEX_MODE_JIT; 160 | } 161 | 162 | lua_pushboolean(L, 1); 163 | 164 | return 1; 165 | } 166 | 167 | static int gc_lua(lua_State *L) 168 | { 169 | lpcre2_t *p = lua_touserdata(L, 1); 170 | 171 | pcre2_code_free(p->code); 172 | 173 | return 0; 174 | } 175 | 176 | static int tostring_lua(lua_State *L) 177 | { 178 | lua_pushfstring(L, MODULE_MT ": %p", lua_touserdata(L, 1)); 179 | return 1; 180 | } 181 | 182 | static int new_lua(lua_State *L) 183 | { 184 | size_t len = 0; 185 | const char *pattern = lauxh_checklstring(L, 1, &len); 186 | uint32_t opts = lauxh_optflags(L, 2); 187 | lpcre2_t *p = lua_newuserdata(L, sizeof(lpcre2_t)); 188 | int rc = 0; 189 | PCRE2_SIZE offset = 0; 190 | 191 | // compile pattern 192 | if (!(p->code = pcre2_compile((PCRE2_SPTR)pattern, len, opts, &rc, &offset, 193 | NULL))) { 194 | lpcre2_error_t err; 195 | 196 | lpcre2_strerror(&err, rc); 197 | lua_pushnil(L); 198 | lua_pushfstring(L, "PCRE2 compilation failed at offset %d: %s", 199 | (int)offset, err.msg); 200 | return 2; 201 | } 202 | 203 | p->mode = 0; 204 | lauxh_setmetatable(L, MODULE_MT); 205 | 206 | return 1; 207 | } 208 | 209 | LUALIB_API int luaopen_pcre2(lua_State *L) 210 | { 211 | struct luaL_Reg mmethod[] = { 212 | {"__gc", gc_lua }, 213 | {"__tostring", tostring_lua}, 214 | {NULL, NULL } 215 | }; 216 | struct luaL_Reg method[] = { 217 | {"jit_compile", jit_compile_lua}, 218 | {"match", match_lua }, 219 | {"match_nocap", match_nocap_lua}, 220 | {NULL, NULL } 221 | }; 222 | struct luaL_Reg *ptr = mmethod; 223 | 224 | // create metatable 225 | luaL_newmetatable(L, MODULE_MT); 226 | while (ptr->name) { 227 | lauxh_pushfn2tbl(L, ptr->name, ptr->func); 228 | ptr++; 229 | } 230 | lua_pushstring(L, "__index"); 231 | lua_newtable(L); 232 | ptr = method; 233 | while (ptr->name) { 234 | lauxh_pushfn2tbl(L, ptr->name, ptr->func); 235 | ptr++; 236 | } 237 | lua_rawset(L, -3); 238 | lua_pop(L, 1); 239 | 240 | lua_newtable(L); 241 | lauxh_pushfn2tbl(L, "new", new_lua); 242 | 243 | // flags for pcre2_compile(), pcre2_match(), or pcre2_dfa_match 244 | lauxh_pushint2tbl(L, "ANCHORED", PCRE2_ANCHORED); 245 | lauxh_pushint2tbl(L, "NO_UTF_CHECK", PCRE2_NO_UTF_CHECK); 246 | 247 | // flags for pcre2_compile() 248 | lauxh_pushint2tbl(L, "ALLOW_EMPTY_CLASS", PCRE2_ALLOW_EMPTY_CLASS); 249 | lauxh_pushint2tbl(L, "ALT_BSUX", PCRE2_ALT_BSUX); 250 | lauxh_pushint2tbl(L, "AUTO_CALLOUT", PCRE2_AUTO_CALLOUT); 251 | lauxh_pushint2tbl(L, "CASELESS", PCRE2_CASELESS); 252 | lauxh_pushint2tbl(L, "DOLLAR_ENDONLY", PCRE2_DOLLAR_ENDONLY); 253 | lauxh_pushint2tbl(L, "DOTALL", PCRE2_DOTALL); 254 | lauxh_pushint2tbl(L, "DUPNAMES", PCRE2_DUPNAMES); 255 | lauxh_pushint2tbl(L, "EXTENDED", PCRE2_EXTENDED); 256 | lauxh_pushint2tbl(L, "FIRSTLINE", PCRE2_FIRSTLINE); 257 | lauxh_pushint2tbl(L, "MATCH_UNSET_BACKREF", PCRE2_MATCH_UNSET_BACKREF); 258 | lauxh_pushint2tbl(L, "MULTILINE", PCRE2_MULTILINE); 259 | lauxh_pushint2tbl(L, "NEVER_UCP", PCRE2_NEVER_UCP); 260 | lauxh_pushint2tbl(L, "NEVER_UTF", PCRE2_NEVER_UTF); 261 | lauxh_pushint2tbl(L, "NO_AUTO_CAPTURE", PCRE2_NO_AUTO_CAPTURE); 262 | lauxh_pushint2tbl(L, "NO_AUTO_POSSESS", PCRE2_NO_AUTO_POSSESS); 263 | lauxh_pushint2tbl(L, "NO_DOTSTAR_ANCHOR", PCRE2_NO_DOTSTAR_ANCHOR); 264 | lauxh_pushint2tbl(L, "NO_START_OPTIMIZE", PCRE2_NO_START_OPTIMIZE); 265 | lauxh_pushint2tbl(L, "UCP", PCRE2_UCP); 266 | lauxh_pushint2tbl(L, "UNGREEDY", PCRE2_UNGREEDY); 267 | lauxh_pushint2tbl(L, "UTF", PCRE2_UTF); 268 | lauxh_pushint2tbl(L, "NEVER_BACKSLASH_C", PCRE2_NEVER_BACKSLASH_C); 269 | lauxh_pushint2tbl(L, "ALT_CIRCUMFLEX", PCRE2_ALT_CIRCUMFLEX); 270 | lauxh_pushint2tbl(L, "ALT_VERBNAMES", PCRE2_ALT_VERBNAMES); 271 | lauxh_pushint2tbl(L, "USE_OFFSET_LIMIT", PCRE2_USE_OFFSET_LIMIT); 272 | 273 | // flags for pcre2_jit_compile() 274 | lauxh_pushint2tbl(L, "JIT_COMPLETE", PCRE2_JIT_COMPLETE); 275 | lauxh_pushint2tbl(L, "JIT_PARTIAL_SOFT", PCRE2_JIT_PARTIAL_SOFT); 276 | lauxh_pushint2tbl(L, "JIT_PARTIAL_HARD", PCRE2_JIT_PARTIAL_HARD); 277 | 278 | // flags for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match() 279 | lauxh_pushint2tbl(L, "NOTBOL", PCRE2_NOTBOL); 280 | lauxh_pushint2tbl(L, "NOTEOL", PCRE2_NOTEOL); 281 | lauxh_pushint2tbl(L, "NOTEMPTY", PCRE2_NOTEMPTY); 282 | lauxh_pushint2tbl(L, "NOTEMPTY_ATSTART", PCRE2_NOTEMPTY_ATSTART); 283 | lauxh_pushint2tbl(L, "PARTIAL_SOFT", PCRE2_PARTIAL_SOFT); 284 | lauxh_pushint2tbl(L, "PARTIAL_HARD", PCRE2_PARTIAL_HARD); 285 | 286 | // flags for pcre2_dfa_match() 287 | lauxh_pushint2tbl(L, "DFA_RESTART", PCRE2_DFA_RESTART); 288 | lauxh_pushint2tbl(L, "DFA_SHORTEST", PCRE2_DFA_SHORTEST); 289 | 290 | // flags for pcre2_match() 291 | // not allowed for pcre2_dfa_match() 292 | // ignored for pcre2_jit_match() 293 | lauxh_pushint2tbl(L, "NO_JIT", PCRE2_NO_JIT); 294 | 295 | // Request types for pcre2_pattern_info() 296 | lauxh_pushint2tbl(L, "INFO_ALLOPTIONS", PCRE2_INFO_ALLOPTIONS); 297 | lauxh_pushint2tbl(L, "INFO_ARGOPTIONS", PCRE2_INFO_ARGOPTIONS); 298 | lauxh_pushint2tbl(L, "INFO_BACKREFMAX", PCRE2_INFO_BACKREFMAX); 299 | lauxh_pushint2tbl(L, "INFO_BSR", PCRE2_INFO_BSR); 300 | lauxh_pushint2tbl(L, "INFO_CAPTURECOUNT", PCRE2_INFO_CAPTURECOUNT); 301 | lauxh_pushint2tbl(L, "INFO_FIRSTCODEUNIT", PCRE2_INFO_FIRSTCODEUNIT); 302 | lauxh_pushint2tbl(L, "INFO_FIRSTCODETYPE", PCRE2_INFO_FIRSTCODETYPE); 303 | lauxh_pushint2tbl(L, "INFO_FIRSTBITMAP", PCRE2_INFO_FIRSTBITMAP); 304 | lauxh_pushint2tbl(L, "INFO_HASCRORLF", PCRE2_INFO_HASCRORLF); 305 | lauxh_pushint2tbl(L, "INFO_JCHANGED", PCRE2_INFO_JCHANGED); 306 | lauxh_pushint2tbl(L, "INFO_JITSIZE", PCRE2_INFO_JITSIZE); 307 | lauxh_pushint2tbl(L, "INFO_LASTCODEUNIT", PCRE2_INFO_LASTCODEUNIT); 308 | lauxh_pushint2tbl(L, "INFO_LASTCODETYPE", PCRE2_INFO_LASTCODETYPE); 309 | lauxh_pushint2tbl(L, "INFO_MATCHEMPTY", PCRE2_INFO_MATCHEMPTY); 310 | lauxh_pushint2tbl(L, "INFO_MATCHLIMIT", PCRE2_INFO_MATCHLIMIT); 311 | lauxh_pushint2tbl(L, "INFO_MAXLOOKBEHIND", PCRE2_INFO_MAXLOOKBEHIND); 312 | lauxh_pushint2tbl(L, "INFO_MINLENGTH", PCRE2_INFO_MINLENGTH); 313 | lauxh_pushint2tbl(L, "INFO_NAMECOUNT", PCRE2_INFO_NAMECOUNT); 314 | lauxh_pushint2tbl(L, "INFO_NAMEENTRYSIZE", PCRE2_INFO_NAMEENTRYSIZE); 315 | lauxh_pushint2tbl(L, "INFO_NAMETABLE", PCRE2_INFO_NAMETABLE); 316 | lauxh_pushint2tbl(L, "INFO_NEWLINE", PCRE2_INFO_NEWLINE); 317 | lauxh_pushint2tbl(L, "INFO_RECURSIONLIMIT", PCRE2_INFO_RECURSIONLIMIT); 318 | lauxh_pushint2tbl(L, "INFO_SIZE", PCRE2_INFO_SIZE); 319 | lauxh_pushint2tbl(L, "INFO_HASBACKSLASHC", PCRE2_INFO_HASBACKSLASHC); 320 | 321 | return 1; 322 | } 323 | -------------------------------------------------------------------------------- /test/pcre2_test.lua: -------------------------------------------------------------------------------- 1 | local assert = assert 2 | local pcre2 = require('pcre2') 3 | local dump = require('dump') 4 | 5 | local function test_jit_compile() 6 | local re = assert(pcre2.new('(\\d+)(\\w)')) 7 | assert(re:jit_compile(pcre2.JIT_COMPLETE)) 8 | end 9 | 10 | local function test_match() 11 | local sbj = 'abc081abc134klj567' 12 | local re = assert(pcre2.new('(\\d+)(\\w)')) 13 | local head, tail, err = re:match(sbj) 14 | local matches = {} 15 | 16 | assert(not err, err) 17 | while head do 18 | local list = {} 19 | for i = 1, #head do 20 | list[i] = sbj:sub(head[i], tail[i]) 21 | end 22 | matches[#matches + 1] = list 23 | 24 | head, tail, err = re:match(sbj, tail[1]) 25 | assert(not err, err) 26 | end 27 | 28 | assert(dump(matches) == dump({ 29 | { 30 | '081a', -- (\\d+)(\\w) 31 | '081', -- (\\d+) 32 | 'a', -- (\\w) 33 | }, 34 | { 35 | '134k', 36 | '134', 37 | 'k', 38 | }, 39 | { 40 | '567', 41 | '56', 42 | '7', 43 | }, 44 | })) 45 | end 46 | 47 | test_jit_compile() 48 | test_match() 49 | 50 | --------------------------------------------------------------------------------