├── .gitignore ├── .travis.yml ├── BUILD.bazel ├── LICENSE ├── README.md ├── build_lib.sh ├── libpcre_darwin_arm64.a ├── libpcre_darwin_x86_64.a ├── libpcre_linux.a ├── pcre-8.42.tar.gz ├── pcre.go ├── pcre.h ├── pcre_fallback.h ├── pcre_test.go ├── platform_darwin_amd64.go ├── platform_darwin_arm64.go └── platform_linux.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.[0-9] 2 | *.cgo?.* 3 | *.o 4 | .DS_Store 5 | [0-9].out 6 | _cgo_* 7 | _obj 8 | _test 9 | _testmain.go 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | sudo: false 3 | addons: 4 | apt: 5 | packages: 6 | - libpcre++-dev 7 | go: 8 | - 1.3.3 9 | - 1.4.1 10 | - 1.4.2 11 | - 1.4.3 12 | - 1.5.1 13 | script: 14 | - go test -v ./... 15 | -------------------------------------------------------------------------------- /BUILD.bazel: -------------------------------------------------------------------------------- 1 | 2 | go_library( 3 | name = "go_default_library", 4 | srcs = [ 5 | "pcre.go", 6 | "pcre.h", 7 | "pcre_fallback.h", 8 | "platform_darwin_amd64.go", 9 | "platform_darwin_arm64.go", 10 | "platform_linux.go", 11 | ], 12 | clinkopts = [], # keep 13 | cdeps = [":libpc"], 14 | cgo = True, 15 | importpath = "github.com/rubrikinc/go-pcre", 16 | visibility = ["//visibility:public"], 17 | ) 18 | 19 | cc_import( 20 | name = "libpc", 21 | hdrs = ["pcre.h"], 22 | static_library = select({ 23 | "@io_bazel_rules_go//go/platform:darwin_amd64": ":libpcre_darwin_x86_64.a", 24 | "@io_bazel_rules_go//go/platform:darwin_arm64": ":libpcre_darwin_arm64.a", 25 | "@io_bazel_rules_go//go/platform:linux": ":libpcre_linux.a", 26 | }), 27 | alwayslink = 1, 28 | ) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Go interface for PCRE is licensed according to the terms below. 2 | Note that PCRE and the Go implementation come with different terms. 3 | 4 | Copyright (c) 2011 Florian Weimer. All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-pcre 2 | 3 | [![GoDoc](https://godoc.org/github.com/gijsbers/go-pcre?status.svg)](https://godoc.org/github.com/gijsbers/go-pcre) 4 | 5 | This is a Go language package providing support for 6 | Perl Compatible Regular Expressions (PCRE). 7 | 8 | ## Installation 9 | 10 | Install the package for Debian as follows: 11 | 12 | sudo apt-get install libpcre++-dev 13 | go get github.com/gijsbers/go-pcre 14 | 15 | ## Usage 16 | 17 | Go programs that depend on this package should import 18 | this package as follows to allow automatic downloading: 19 | 20 | import "github.com/gijsbers/go-pcre" 21 | 22 | ## Upgrading 23 | 24 | To upgrade static libraries, run the following script on Linux and Mac to create the necessary static libs. 25 | 26 | ./build_lib.sh 27 | 28 | The static library will be either `libpcre_darwin.a` or `libpcre_linux.a` 29 | 30 | The library is compiled with the following options: 31 | ``` 32 | --enable-jit 33 | --enable-utf 34 | --disable-shared 35 | --disable-cpp 36 | --enable-newline-is-any 37 | --with-match-limit=500000 38 | --with-match-limit-recursion=50000 39 | ``` 40 | 41 | ## History 42 | 43 | This is a clone of 44 | [golang-pkg-pcre](http://git.enyo.de/fw/debian/golang-pkg-pcre.git) 45 | by Florian Weimer, which has been placed on Github by Glenn Brown, 46 | so it can be fetched automatically by Go's package installer. 47 | 48 | Glenn Brown added `FindIndex()` and `ReplaceAll()` 49 | to mimic functions in Go's default regexp package. 50 | 51 | Mathieu Payeur Levallois added `Matcher.ExtractString()`. 52 | 53 | Malte Nuhn added `GroupIndices()` to retrieve positions of a matching group. 54 | 55 | Chandra Sekar S added `Index()` and stopped invoking `Match()` twice in `FindIndex()`. 56 | 57 | Misakwa added support for `pkg-config` to locate `libpcre`. 58 | 59 | Yann Ramin added `ReplaceAllString()` and changed `Compile()` return type to `error`. 60 | 61 | Nikolay Sivko modified `name2index()` to return error instead of panic. 62 | 63 | Harry Waye exposed raw `pcre_exec`. 64 | 65 | Hazzadous added partial match support. 66 | 67 | Pavel Gryaznov added support for JIT compilation. 68 | -------------------------------------------------------------------------------- /build_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TEMP=$(mktemp -d) 3 | SRC="pcre-8.42" 4 | echo "Using temp directory $TEMP to build $SRC" 5 | ( 6 | cp "$SRC.tar.gz" "$TEMP" 7 | cd "$TEMP" 8 | tar -xf "$SRC.tar.gz" 9 | ( 10 | cd "$SRC" 11 | ./configure \ 12 | --enable-jit \ 13 | --enable-utf \ 14 | --disable-shared \ 15 | --disable-cpp \ 16 | --enable-newline-is-any \ 17 | --with-match-limit=500000 \ 18 | --with-match-limit-recursion=50000 19 | make 20 | ) 21 | ) 22 | PLATFORM="$(uname -sm)" 23 | case "${PLATFORM}" in 24 | Linux*) OUTPUT=libpcre_linux.a;; 25 | "Darwin x86_64"*) OUTPUT=libpcre_darwin_x86_64.a;; 26 | "Darwin arm64"*) OUTPUT=libpcre_darwin_arm64.a;; 27 | *) OUTPUT=libpcre.a 28 | esac 29 | cp "$TEMP/$SRC/.libs/libpcre.a" "$OUTPUT" 30 | echo "Copied static library to $OUTPUT" 31 | rm -rf "$TEMP" 32 | -------------------------------------------------------------------------------- /libpcre_darwin_arm64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubrikinc/go-pcre/b732e5754d4b68da0fb672476c2626b670e4373b/libpcre_darwin_arm64.a -------------------------------------------------------------------------------- /libpcre_darwin_x86_64.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubrikinc/go-pcre/b732e5754d4b68da0fb672476c2626b670e4373b/libpcre_darwin_x86_64.a -------------------------------------------------------------------------------- /libpcre_linux.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubrikinc/go-pcre/b732e5754d4b68da0fb672476c2626b670e4373b/libpcre_linux.a -------------------------------------------------------------------------------- /pcre-8.42.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rubrikinc/go-pcre/b732e5754d4b68da0fb672476c2626b670e4373b/pcre-8.42.tar.gz -------------------------------------------------------------------------------- /pcre.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Florian Weimer. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // 10 | // * Redistributions in binary form must reproduce the above copyright 11 | // notice, this list of conditions and the following disclaimer in the 12 | // documentation and/or other materials provided with the distribution. 13 | // 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | // Package pcre provides access to the Perl Compatible Regular 27 | // Expresion library, PCRE. 28 | // 29 | // It implements two main types, Regexp and Matcher. Regexp objects 30 | // store a compiled regular expression. They consist of two immutable 31 | // parts: pcre and pcre_extra. Compile()/MustCompile() initialize pcre. 32 | // Calling Study() on a compiled Regexp initializes pcre_extra. 33 | // Compilation of regular expressions using Compile or MustCompile is 34 | // slightly expensive, so these objects should be kept and reused, 35 | // instead of compiling them from scratch for each matching attempt. 36 | // CompileJIT and MustCompileJIT are way more expensive, because they 37 | // run Study() after compiling a Regexp, but they tend to give 38 | // much better perfomance: 39 | // http://sljit.sourceforge.net/regex_perf.html 40 | // 41 | // Matcher objects keeps the results of a match against a []byte or 42 | // string subject. The Group and GroupString functions provide access 43 | // to capture groups; both versions work no matter if the subject was a 44 | // []byte or string, but the version with the matching type is slightly 45 | // more efficient. 46 | // 47 | // Matcher objects contain some temporary space and refer the original 48 | // subject. They are mutable and can be reused (using Match, 49 | // MatchString, Reset or ResetString). 50 | // 51 | // For details on the regular expression language implemented by this 52 | // package and the flags defined below, see the PCRE documentation. 53 | // http://www.pcre.org/pcre.txt 54 | package pcre 55 | 56 | // #include 57 | // #include "./pcre.h" 58 | // #include "./pcre_fallback.h" 59 | // static inline void pcre_free_stub(void *re) { 60 | // pcre_free(re); 61 | // } 62 | import "C" 63 | 64 | import ( 65 | "errors" 66 | "fmt" 67 | "strconv" 68 | "unsafe" 69 | ) 70 | 71 | // Flags for Compile and Match functions. 72 | const ( 73 | ANCHORED = C.PCRE_ANCHORED 74 | BSR_ANYCRLF = C.PCRE_BSR_ANYCRLF 75 | BSR_UNICODE = C.PCRE_BSR_UNICODE 76 | NEWLINE_ANY = C.PCRE_NEWLINE_ANY 77 | NEWLINE_ANYCRLF = C.PCRE_NEWLINE_ANYCRLF 78 | NEWLINE_CR = C.PCRE_NEWLINE_CR 79 | NEWLINE_CRLF = C.PCRE_NEWLINE_CRLF 80 | NEWLINE_LF = C.PCRE_NEWLINE_LF 81 | NO_START_OPTIMIZE = C.PCRE_NO_START_OPTIMIZE 82 | NO_UTF8_CHECK = C.PCRE_NO_UTF8_CHECK 83 | ) 84 | 85 | // Flags for Compile functions 86 | const ( 87 | CASELESS = C.PCRE_CASELESS 88 | DOLLAR_ENDONLY = C.PCRE_DOLLAR_ENDONLY 89 | DOTALL = C.PCRE_DOTALL 90 | DUPNAMES = C.PCRE_DUPNAMES 91 | EXTENDED = C.PCRE_EXTENDED 92 | EXTRA = C.PCRE_EXTRA 93 | FIRSTLINE = C.PCRE_FIRSTLINE 94 | JAVASCRIPT_COMPAT = C.PCRE_JAVASCRIPT_COMPAT 95 | MULTILINE = C.PCRE_MULTILINE 96 | NEVER_UTF = C.PCRE_NEVER_UTF 97 | NO_AUTO_CAPTURE = C.PCRE_NO_AUTO_CAPTURE 98 | UNGREEDY = C.PCRE_UNGREEDY 99 | UTF8 = C.PCRE_UTF8 100 | UCP = C.PCRE_UCP 101 | ) 102 | 103 | // Flags for Match functions 104 | const ( 105 | NOTBOL = C.PCRE_NOTBOL 106 | NOTEOL = C.PCRE_NOTEOL 107 | NOTEMPTY = C.PCRE_NOTEMPTY 108 | NOTEMPTY_ATSTART = C.PCRE_NOTEMPTY_ATSTART 109 | PARTIAL_HARD = C.PCRE_PARTIAL_HARD 110 | PARTIAL_SOFT = C.PCRE_PARTIAL_SOFT 111 | ) 112 | 113 | // Flags for Study function 114 | const ( 115 | STUDY_JIT_COMPILE = C.PCRE_STUDY_JIT_COMPILE 116 | STUDY_JIT_PARTIAL_SOFT_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 117 | STUDY_JIT_PARTIAL_HARD_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 118 | ) 119 | 120 | // Exec-time and get/set-time error codes 121 | const ( 122 | ERROR_NOMATCH = C.PCRE_ERROR_NOMATCH 123 | ERROR_NULL = C.PCRE_ERROR_NULL 124 | ERROR_BADOPTION = C.PCRE_ERROR_BADOPTION 125 | ERROR_BADMAGIC = C.PCRE_ERROR_BADMAGIC 126 | ERROR_UNKNOWN_OPCODE = C.PCRE_ERROR_UNKNOWN_OPCODE 127 | ERROR_UNKNOWN_NODE = C.PCRE_ERROR_UNKNOWN_NODE 128 | ERROR_NOMEMORY = C.PCRE_ERROR_NOMEMORY 129 | ERROR_NOSUBSTRING = C.PCRE_ERROR_NOSUBSTRING 130 | ERROR_MATCHLIMIT = C.PCRE_ERROR_MATCHLIMIT 131 | ERROR_CALLOUT = C.PCRE_ERROR_CALLOUT 132 | ERROR_BADUTF8 = C.PCRE_ERROR_BADUTF8 133 | ERROR_BADUTF8_OFFSET = C.PCRE_ERROR_BADUTF8_OFFSET 134 | ERROR_PARTIAL = C.PCRE_ERROR_PARTIAL 135 | ERROR_BADPARTIAL = C.PCRE_ERROR_BADPARTIAL 136 | ERROR_RECURSIONLIMIT = C.PCRE_ERROR_RECURSIONLIMIT 137 | ERROR_INTERNAL = C.PCRE_ERROR_INTERNAL 138 | ERROR_BADCOUNT = C.PCRE_ERROR_BADCOUNT 139 | ERROR_JIT_STACKLIMIT = C.PCRE_ERROR_JIT_STACKLIMIT 140 | ) 141 | 142 | // Regexp holds a reference to a compiled regular expression. 143 | // Use Compile or MustCompile to create such objects. 144 | // Use FreeRegexp to free memory when done with the struct. 145 | type Regexp struct { 146 | ptr *C.pcre 147 | extra *C.pcre_extra 148 | } 149 | 150 | // Number of bytes in the compiled pattern 151 | func pcreSize(ptr *C.pcre) (size C.size_t) { 152 | C.pcre_fullinfo(ptr, nil, C.PCRE_INFO_SIZE, unsafe.Pointer(&size)) 153 | return 154 | } 155 | 156 | // Number of capture groups 157 | func pcreGroups(ptr *C.pcre) (count C.int) { 158 | C.pcre_fullinfo(ptr, nil, 159 | C.PCRE_INFO_CAPTURECOUNT, unsafe.Pointer(&count)) 160 | return 161 | } 162 | 163 | // Free c allocated memory related to regexp. 164 | func (re *Regexp) FreeRegexp() { 165 | // pcre_free is a function pointer, call a stub that calls it. 166 | if re.ptr != nil { 167 | C.pcre_free_stub(unsafe.Pointer(re.ptr)) 168 | re.ptr = nil 169 | } 170 | if re.extra != nil { 171 | C.pcre_free_study(re.extra) 172 | re.extra = nil 173 | } 174 | } 175 | 176 | // Compile the pattern and return a compiled regexp. 177 | // If compilation fails, the second return value holds a *CompileError. 178 | func Compile(pattern string, flags int) (Regexp, error) { 179 | re := Regexp{} 180 | pattern1 := C.CString(pattern) 181 | defer C.free(unsafe.Pointer(pattern1)) 182 | if clen := int(C.strlen(pattern1)); clen != len(pattern) { 183 | return Regexp{}, &CompileError{ 184 | Pattern: pattern, 185 | Message: "NUL byte in pattern", 186 | Offset: clen, 187 | } 188 | } 189 | var errptr *C.char 190 | var erroffset C.int 191 | re.ptr = C.pcre_compile(pattern1, C.int(flags), &errptr, &erroffset, nil) 192 | if re.ptr == nil { 193 | return Regexp{}, &CompileError{ 194 | Pattern: pattern, 195 | Message: C.GoString(errptr), 196 | Offset: int(erroffset), 197 | } 198 | } 199 | return re, nil 200 | } 201 | 202 | // CompileJIT is a combination of Compile and Study. It first compiles 203 | // the pattern and if this succeeds calls Study on the compiled pattern. 204 | // comFlags are Compile flags, jitFlags are study flags. 205 | // If compilation fails, the second return value holds a *CompileError. 206 | func CompileJIT(pattern string, comFlags, jitFlags int) (Regexp, error) { 207 | re, err := Compile(pattern, comFlags) 208 | if err == nil { 209 | err = (&re).Study(jitFlags) 210 | } 211 | return re, err 212 | } 213 | 214 | // MustCompile compiles the pattern. If compilation fails, panic. 215 | func MustCompile(pattern string, flags int) (re Regexp) { 216 | re, err := Compile(pattern, flags) 217 | if err != nil { 218 | panic(err) 219 | } 220 | return 221 | } 222 | 223 | // MustCompileJIT compiles and studies the pattern. On failure it panics. 224 | func MustCompileJIT(pattern string, comFlags, jitFlags int) (re Regexp) { 225 | re, err := CompileJIT(pattern, comFlags, jitFlags) 226 | if err != nil { 227 | panic(err) 228 | } 229 | return 230 | } 231 | 232 | // Study adds Just-In-Time compilation to a Regexp. This may give a huge 233 | // speed boost when matching. If an error occurs, return value is non-nil. 234 | // Flags optionally specifies JIT compilation options for partial matches. 235 | func (re *Regexp) Study(flags int) error { 236 | if re.extra != nil { 237 | return fmt.Errorf("Study: Regexp has already been optimized") 238 | } 239 | if flags == 0 { 240 | flags = STUDY_JIT_COMPILE 241 | } 242 | 243 | var err *C.char 244 | re.extra = C.pcre_study(re.ptr, C.int(flags), &err) 245 | if err != nil { 246 | return fmt.Errorf("%s", C.GoString(err)) 247 | } 248 | if re.extra == nil { 249 | // Studying the pattern may not produce useful information. 250 | return nil 251 | } 252 | return nil 253 | } 254 | 255 | // Groups returns the number of capture groups in the compiled pattern. 256 | func (re Regexp) Groups() int { 257 | if re.ptr == nil { 258 | panic("Regexp.Groups: uninitialized") 259 | } 260 | return int(pcreGroups(re.ptr)) 261 | } 262 | 263 | // Matcher objects provide a place for storing match results. 264 | // They can be created by the Matcher and MatcherString functions, 265 | // or they can be initialized with Reset or ResetString. 266 | type Matcher struct { 267 | re Regexp 268 | groups int 269 | ovector []C.int // scratch space for capture offsets 270 | matches bool // last match was successful 271 | partial bool // was the last match a partial match? 272 | subjects string // one of these fields is set to record the subject, 273 | subjectb []byte // so that Group/GroupString can return slices 274 | err error 275 | } 276 | 277 | // NewMatcher creates a new matcher object for the given Regexp. 278 | func (re Regexp) NewMatcher() (m *Matcher) { 279 | m = new(Matcher) 280 | m.Init(&re) 281 | return 282 | } 283 | 284 | // Matcher creates a new matcher object, with the byte slice as subject. 285 | // It also starts a first match on subject. Test for success with Matches(). 286 | func (re Regexp) Matcher(subject []byte, flags int) (m *Matcher) { 287 | m = re.NewMatcher() 288 | m.Match(subject, flags) 289 | return 290 | } 291 | 292 | // MatcherString creates a new matcher, with the specified subject string. 293 | // It also starts a first match on subject. Test for success with Matches(). 294 | func (re Regexp) MatcherString(subject string, flags int) (m *Matcher) { 295 | m = re.NewMatcher() 296 | m.MatchString(subject, flags) 297 | return 298 | 299 | } 300 | 301 | // Reset switches the matcher object to the specified regexp and subject. 302 | // It also starts a first match on subject. 303 | func (m *Matcher) Reset(re Regexp, subject []byte, flags int) bool { 304 | m.Init(&re) 305 | return m.Match(subject, flags) 306 | } 307 | 308 | // ResetString switches the matcher object to the given regexp and subject. 309 | // It also starts a first match on subject. 310 | func (m *Matcher) ResetString(re Regexp, subject string, flags int) bool { 311 | m.Init(&re) 312 | return m.MatchString(subject, flags) 313 | } 314 | 315 | // Init binds an existing Matcher object to the given Regexp. 316 | func (m *Matcher) Init(re *Regexp) { 317 | if re.ptr == nil { 318 | panic("Matcher.Init: uninitialized") 319 | } 320 | m.matches = false 321 | m.err = nil 322 | if m.re.ptr != nil && m.re.ptr == re.ptr { 323 | // Skip group count extraction if the matcher has 324 | // already been initialized with the same regular 325 | // expression. 326 | return 327 | } 328 | m.re = *re 329 | m.groups = re.Groups() 330 | if ovectorlen := 3 * (1 + m.groups); len(m.ovector) < ovectorlen { 331 | m.ovector = make([]C.int, ovectorlen) 332 | } 333 | } 334 | 335 | // Err returns first error encountered by Matcher. 336 | func (m *Matcher) Err() error { 337 | return m.err 338 | } 339 | 340 | var nullbyte = []byte{0} 341 | 342 | // Match tries to match the specified byte slice to 343 | // the current pattern by calling Exec and collects the result. 344 | // Returns true if the match succeeds. 345 | // Match is a no-op if err is not nil. 346 | func (m *Matcher) Match(subject []byte, flags int) bool { 347 | if m.err != nil { 348 | return false 349 | } 350 | if m.re.ptr == nil { 351 | panic("Matcher.Match: uninitialized") 352 | } 353 | rc := m.Exec(subject, flags) 354 | m.matches, m.err = matched(rc) 355 | m.partial = (rc == ERROR_PARTIAL) 356 | return m.matches 357 | } 358 | 359 | // MatchString tries to match the specified subject string to 360 | // the current pattern by calling ExecString and collects the result. 361 | // Returns true if the match succeeds. 362 | func (m *Matcher) MatchString(subject string, flags int) bool { 363 | if m.err != nil { 364 | return false 365 | } 366 | if m.re.ptr == nil { 367 | panic("Matcher.MatchString: uninitialized") 368 | } 369 | rc := m.ExecString(subject, flags) 370 | m.matches, m.err = matched(rc) 371 | m.partial = (rc == ERROR_PARTIAL) 372 | return m.matches 373 | } 374 | 375 | // Exec tries to match the specified byte slice to 376 | // the current pattern. Returns the raw pcre_exec error code. 377 | func (m *Matcher) Exec(subject []byte, flags int) int { 378 | if m.re.ptr == nil { 379 | panic("Matcher.Exec: uninitialized") 380 | } 381 | length := len(subject) 382 | m.subjects = "" 383 | m.subjectb = subject 384 | if length == 0 { 385 | subject = nullbyte // make first character adressable 386 | } 387 | subjectptr := (*C.char)(unsafe.Pointer(&subject[0])) 388 | return m.exec(subjectptr, length, flags) 389 | } 390 | 391 | // ExecString tries to match the specified subject string to 392 | // the current pattern. It returns the raw pcre_exec error code. 393 | func (m *Matcher) ExecString(subject string, flags int) int { 394 | if m.re.ptr == nil { 395 | panic("Matcher.ExecString: uninitialized") 396 | } 397 | length := len(subject) 398 | m.subjects = subject 399 | m.subjectb = nil 400 | if length == 0 { 401 | subject = "\000" // make first character addressable 402 | } 403 | // The following is a non-portable kludge to avoid a copy 404 | subjectptr := *(**C.char)(unsafe.Pointer(&subject)) 405 | return m.exec(subjectptr, length, flags) 406 | } 407 | 408 | func (m *Matcher) exec(subjectptr *C.char, length, flags int) int { 409 | rc := C.pcre_exec(m.re.ptr, m.re.extra, 410 | subjectptr, C.int(length), 411 | 0, C.int(flags), &m.ovector[0], C.int(len(m.ovector))) 412 | return int(rc) 413 | } 414 | 415 | // matched checks the return code of a pattern match for success. 416 | func matched(rc int) (bool, error) { 417 | switch { 418 | case rc >= 0 || rc == C.PCRE_ERROR_PARTIAL: 419 | return true, nil 420 | case rc == C.PCRE_ERROR_NOMATCH: 421 | return false, nil 422 | case rc == C.PCRE_ERROR_BADOPTION: 423 | return false, errors.New("PCRE.Match: invalid option flag") 424 | } 425 | err := errors.New( 426 | "unexpected return code from pcre_exec: " + strconv.Itoa(rc), 427 | ) 428 | return false, err 429 | } 430 | 431 | // Matches returns true if a previous call to Matcher, MatcherString, Reset, 432 | // ResetString, Match or MatchString succeeded. 433 | func (m *Matcher) Matches() bool { 434 | return m.matches 435 | } 436 | 437 | // Partial returns true if a previous call to Matcher, MatcherString, Reset, 438 | // ResetString, Match or MatchString found a partial match. 439 | func (m *Matcher) Partial() bool { 440 | return m.partial 441 | } 442 | 443 | // Groups returns the number of groups in the current pattern. 444 | func (m *Matcher) Groups() int { 445 | return m.groups 446 | } 447 | 448 | // Present returns true if the numbered capture group is present in the last 449 | // match (performed by Matcher, MatcherString, Reset, ResetString, 450 | // Match, or MatchString). Group numbers start at 1. A capture group 451 | // can be present and match the empty string. 452 | func (m *Matcher) Present(group int) bool { 453 | return m.ovector[2*group] >= 0 454 | } 455 | 456 | // Group returns the numbered capture group of the last match (performed by 457 | // Matcher, MatcherString, Reset, ResetString, Match, or MatchString). 458 | // Group 0 is the part of the subject which matches the whole pattern; 459 | // the first actual capture group is numbered 1. Capture groups which 460 | // are not present return a nil slice. 461 | func (m *Matcher) Group(group int) []byte { 462 | start := m.ovector[2*group] 463 | end := m.ovector[2*group+1] 464 | if start >= 0 { 465 | if m.subjectb != nil { 466 | return m.subjectb[start:end] 467 | } 468 | return []byte(m.subjects[start:end]) 469 | } 470 | return nil 471 | } 472 | 473 | // Extract returns a slice of byte slices for a single match. 474 | // The first byte slice contains the complete match. 475 | // Subsequent byte slices contain the captured groups. 476 | // If there was no match then nil is returned. 477 | func (m *Matcher) Extract() [][]byte { 478 | if !m.matches { 479 | return nil 480 | } 481 | extract := make([][]byte, m.groups+1) 482 | extract[0] = m.subjectb 483 | for i := 1; i <= m.groups; i++ { 484 | x0 := m.ovector[2*i] 485 | x1 := m.ovector[2*i+1] 486 | extract[i] = m.subjectb[x0:x1] 487 | } 488 | return extract 489 | } 490 | 491 | // ExtractString returns a slice of strings for a single match. 492 | // The first string contains the complete match. 493 | // Subsequent strings in the slice contain the captured groups. 494 | // If there was no match then nil is returned. 495 | func (m *Matcher) ExtractString() []string { 496 | if !m.matches { 497 | return nil 498 | } 499 | extract := make([]string, m.groups+1) 500 | extract[0] = m.subjects 501 | for i := 1; i <= m.groups; i++ { 502 | x0 := m.ovector[2*i] 503 | x1 := m.ovector[2*i+1] 504 | extract[i] = m.subjects[x0:x1] 505 | } 506 | return extract 507 | } 508 | 509 | // GroupIndices returns the numbered capture group positions of the last 510 | // match (performed by Matcher, MatcherString, Reset, ResetString, Match, 511 | // or MatchString). Group 0 is the part of the subject which matches 512 | // the whole pattern; the first actual capture group is numbered 1. 513 | // Capture groups which are not present return a nil slice. 514 | func (m *Matcher) GroupIndices(group int) []int { 515 | start := m.ovector[2*group] 516 | end := m.ovector[2*group+1] 517 | if start >= 0 { 518 | return []int{int(start), int(end)} 519 | } 520 | return nil 521 | } 522 | 523 | // GroupString returns the numbered capture group as a string. Group 0 524 | // is the part of the subject which matches the whole pattern; the first 525 | // actual capture group is numbered 1. Capture groups which are not 526 | // present return an empty string. 527 | func (m *Matcher) GroupString(group int) string { 528 | start := m.ovector[2*group] 529 | end := m.ovector[2*group+1] 530 | if start >= 0 { 531 | if m.subjectb != nil { 532 | return string(m.subjectb[start:end]) 533 | } 534 | return m.subjects[start:end] 535 | } 536 | return "" 537 | } 538 | 539 | // Index returns the start and end of the first match, if a previous 540 | // call to Matcher, MatcherString, Reset, ResetString, Match or 541 | // MatchString succeeded. loc[0] is the start and loc[1] is the end. 542 | func (m *Matcher) Index() (loc []int) { 543 | if !m.matches { 544 | return nil 545 | } 546 | loc = []int{int(m.ovector[0]), int(m.ovector[1])} 547 | return 548 | } 549 | 550 | // name2index converts a group name to its group index number. 551 | func (m *Matcher) name2index(name string) (int, error) { 552 | if m.re.ptr == nil { 553 | return 0, fmt.Errorf("Matcher.Named: uninitialized") 554 | } 555 | name1 := C.CString(name) 556 | defer C.free(unsafe.Pointer(name1)) 557 | group := int(C.pcre_get_stringnumber(m.re.ptr, name1)) 558 | if group < 0 { 559 | return group, fmt.Errorf("Matcher.Named: unknown name: " + name) 560 | } 561 | return group, nil 562 | } 563 | 564 | // Named returns the value of the named capture group. 565 | // This is a nil slice if the capture group is not present. 566 | // If the name does not refer to a group then error is non-nil. 567 | func (m *Matcher) Named(group string) ([]byte, error) { 568 | groupNum, err := m.name2index(group) 569 | if err != nil { 570 | return []byte{}, err 571 | } 572 | return m.Group(groupNum), nil 573 | } 574 | 575 | // NamedString returns the value of the named capture group, 576 | // or an empty string if the capture group is not present. 577 | // If the name does not refer to a group then error is non-nil. 578 | func (m *Matcher) NamedString(group string) (string, error) { 579 | groupNum, err := m.name2index(group) 580 | if err != nil { 581 | return "", err 582 | } 583 | return m.GroupString(groupNum), nil 584 | } 585 | 586 | // NamedPresent returns true if the named capture group is present. 587 | // If the name does not refer to a group then error is non-nil. 588 | func (m *Matcher) NamedPresent(group string) (bool, error) { 589 | groupNum, err := m.name2index(group) 590 | if err != nil { 591 | return false, err 592 | } 593 | return m.Present(groupNum), nil 594 | } 595 | 596 | // FindIndex returns the start and end of the first match, 597 | // or nil if no match. loc[0] is the start and loc[1] is the end. 598 | func (re *Regexp) FindIndex(bytes []byte, flags int) (loc []int) { 599 | m := re.Matcher(bytes, flags) 600 | if m.Matches() { 601 | loc = []int{int(m.ovector[0]), int(m.ovector[1])} 602 | return 603 | } 604 | return nil 605 | } 606 | 607 | // ReplaceAll returns a copy of a byte slice 608 | // where all pattern matches are replaced by repl. 609 | func (re Regexp) ReplaceAll(bytes, repl []byte, flags int) ([]byte, error) { 610 | m := re.Matcher(bytes, flags) 611 | r := []byte{} 612 | for m.matches { 613 | r = append(append(r, bytes[:m.ovector[0]]...), repl...) 614 | bytes = bytes[m.ovector[1]:] 615 | m.Match(bytes, flags) 616 | } 617 | return append(r, bytes...), m.err 618 | } 619 | 620 | // ReplaceAllString is equivalent to ReplaceAll with string return type. 621 | func (re Regexp) ReplaceAllString(in, repl string, flags int) (string, error) { 622 | str, err := re.ReplaceAll([]byte(in), []byte(repl), flags) 623 | return string(str), err 624 | } 625 | 626 | // Match holds details about a single successful regex match. 627 | type Match struct { 628 | Finding string // Text that was found. 629 | Loc []int // Index bounds for location of finding. 630 | } 631 | 632 | // FindAll finds all instances that match the regex. 633 | func (re Regexp) FindAll(subject string, flags int) ([]Match, error) { 634 | matches := make([]Match, 0) 635 | m := re.MatcherString(subject, flags) 636 | offset := 0 637 | for m.Matches() { 638 | leftIdx := int(m.ovector[0]) + offset 639 | rightIdx := int(m.ovector[1]) + offset 640 | matches = append( 641 | matches, 642 | Match{ 643 | subject[leftIdx:rightIdx], 644 | []int{leftIdx, rightIdx}, 645 | }, 646 | ) 647 | offset += maxInt(1, int(m.ovector[1])) 648 | if offset < len(subject) { 649 | m.MatchString(subject[offset:], flags) 650 | } else { 651 | break 652 | } 653 | } 654 | return matches, m.err 655 | } 656 | 657 | // CompileError holds details about a compilation error, 658 | // as returned by the Compile function. The offset is 659 | // the byte position in the pattern string at which the 660 | // error was detected. 661 | type CompileError struct { 662 | Pattern string // The failed pattern 663 | Message string // The error message 664 | Offset int // Byte position of error 665 | } 666 | 667 | // Error converts a compile error to a string 668 | func (e *CompileError) Error() string { 669 | return e.Pattern + " (" + strconv.Itoa(e.Offset) + "): " + e.Message 670 | } 671 | 672 | func maxInt(a, b int) int { 673 | if a > b { 674 | return a 675 | } else { 676 | return b 677 | } 678 | } 679 | -------------------------------------------------------------------------------- /pcre.h: -------------------------------------------------------------------------------- 1 | /************************************************* 2 | * Perl-Compatible Regular Expressions * 3 | *************************************************/ 4 | 5 | /* This is the public header file for the PCRE library, to be #included by 6 | applications that call the PCRE functions. 7 | 8 | Copyright (c) 1997-2014 University of Cambridge 9 | 10 | ----------------------------------------------------------------------------- 11 | Redistribution and use in source and binary forms, with or without 12 | modification, are permitted provided that the following conditions are met: 13 | 14 | * Redistributions of source code must retain the above copyright notice, 15 | this list of conditions and the following disclaimer. 16 | 17 | * Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | * Neither the name of the University of Cambridge nor the names of its 22 | contributors may be used to endorse or promote products derived from 23 | this software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | POSSIBILITY OF SUCH DAMAGE. 36 | ----------------------------------------------------------------------------- 37 | */ 38 | 39 | #ifndef _PCRE_H 40 | #define _PCRE_H 41 | 42 | /* The current PCRE version information. */ 43 | 44 | #define PCRE_MAJOR @PCRE_MAJOR@ 45 | #define PCRE_MINOR @PCRE_MINOR@ 46 | #define PCRE_PRERELEASE @PCRE_PRERELEASE@ 47 | #define PCRE_DATE @PCRE_DATE@ 48 | 49 | /* When an application links to a PCRE DLL in Windows, the symbols that are 50 | imported have to be identified as such. When building PCRE, the appropriate 51 | export setting is defined in pcre_internal.h, which includes this file. So we 52 | don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */ 53 | 54 | #if defined(_WIN32) && !defined(PCRE_STATIC) 55 | # ifndef PCRE_EXP_DECL 56 | # define PCRE_EXP_DECL extern __declspec(dllimport) 57 | # endif 58 | # ifdef __cplusplus 59 | # ifndef PCRECPP_EXP_DECL 60 | # define PCRECPP_EXP_DECL extern __declspec(dllimport) 61 | # endif 62 | # ifndef PCRECPP_EXP_DEFN 63 | # define PCRECPP_EXP_DEFN __declspec(dllimport) 64 | # endif 65 | # endif 66 | #endif 67 | 68 | /* By default, we use the standard "extern" declarations. */ 69 | 70 | #ifndef PCRE_EXP_DECL 71 | # ifdef __cplusplus 72 | # define PCRE_EXP_DECL extern "C" 73 | # else 74 | # define PCRE_EXP_DECL extern 75 | # endif 76 | #endif 77 | 78 | #ifdef __cplusplus 79 | # ifndef PCRECPP_EXP_DECL 80 | # define PCRECPP_EXP_DECL extern 81 | # endif 82 | # ifndef PCRECPP_EXP_DEFN 83 | # define PCRECPP_EXP_DEFN 84 | # endif 85 | #endif 86 | 87 | /* Have to include stdlib.h in order to ensure that size_t is defined; 88 | it is needed here for malloc. */ 89 | 90 | #include 91 | 92 | /* Allow for C++ users */ 93 | 94 | #ifdef __cplusplus 95 | extern "C" { 96 | #endif 97 | 98 | /* Public options. Some are compile-time only, some are run-time only, and some 99 | are both. Most of the compile-time options are saved with the compiled regex so 100 | that they can be inspected during studying (and therefore JIT compiling). Note 101 | that pcre_study() has its own set of options. Originally, all the options 102 | defined here used distinct bits. However, almost all the bits in a 32-bit word 103 | are now used, so in order to conserve them, option bits that were previously 104 | only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may 105 | also be used for compile-time options that affect only compiling and are not 106 | relevant for studying or JIT compiling. 107 | 108 | Some options for pcre_compile() change its behaviour but do not affect the 109 | behaviour of the execution functions. Other options are passed through to the 110 | execution functions and affect their behaviour, with or without affecting the 111 | behaviour of pcre_compile(). 112 | 113 | Options that can be passed to pcre_compile() are tagged Cx below, with these 114 | variants: 115 | 116 | C1 Affects compile only 117 | C2 Does not affect compile; affects exec, dfa_exec 118 | C3 Affects compile, exec, dfa_exec 119 | C4 Affects compile, exec, dfa_exec, study 120 | C5 Affects compile, exec, study 121 | 122 | Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with 123 | E and D, respectively. They take precedence over C3, C4, and C5 settings passed 124 | from pcre_compile(). Those that are compatible with JIT execution are flagged 125 | with J. */ 126 | 127 | #define PCRE_CASELESS 0x00000001 /* C1 */ 128 | #define PCRE_MULTILINE 0x00000002 /* C1 */ 129 | #define PCRE_DOTALL 0x00000004 /* C1 */ 130 | #define PCRE_EXTENDED 0x00000008 /* C1 */ 131 | #define PCRE_ANCHORED 0x00000010 /* C4 E D */ 132 | #define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */ 133 | #define PCRE_EXTRA 0x00000040 /* C1 */ 134 | #define PCRE_NOTBOL 0x00000080 /* E D J */ 135 | #define PCRE_NOTEOL 0x00000100 /* E D J */ 136 | #define PCRE_UNGREEDY 0x00000200 /* C1 */ 137 | #define PCRE_NOTEMPTY 0x00000400 /* E D J */ 138 | #define PCRE_UTF8 0x00000800 /* C4 ) */ 139 | #define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */ 140 | #define PCRE_UTF32 0x00000800 /* C4 ) */ 141 | #define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */ 142 | #define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */ 143 | #define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */ 144 | #define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */ 145 | #define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */ 146 | #define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */ 147 | #define PCRE_PARTIAL 0x00008000 /* E D J ) */ 148 | 149 | /* This pair use the same bit. */ 150 | #define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */ 151 | #define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */ 152 | 153 | /* This pair use the same bit. */ 154 | #define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */ 155 | #define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */ 156 | 157 | #define PCRE_FIRSTLINE 0x00040000 /* C3 */ 158 | #define PCRE_DUPNAMES 0x00080000 /* C1 */ 159 | #define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */ 160 | #define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */ 161 | #define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */ 162 | #define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */ 163 | #define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */ 164 | #define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */ 165 | #define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */ 166 | #define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */ 167 | #define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */ 168 | #define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */ 169 | #define PCRE_PARTIAL_HARD 0x08000000 /* E D J */ 170 | #define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */ 171 | #define PCRE_UCP 0x20000000 /* C3 */ 172 | 173 | /* Exec-time and get/set-time error codes */ 174 | 175 | #define PCRE_ERROR_NOMATCH (-1) 176 | #define PCRE_ERROR_NULL (-2) 177 | #define PCRE_ERROR_BADOPTION (-3) 178 | #define PCRE_ERROR_BADMAGIC (-4) 179 | #define PCRE_ERROR_UNKNOWN_OPCODE (-5) 180 | #define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */ 181 | #define PCRE_ERROR_NOMEMORY (-6) 182 | #define PCRE_ERROR_NOSUBSTRING (-7) 183 | #define PCRE_ERROR_MATCHLIMIT (-8) 184 | #define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ 185 | #define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */ 186 | #define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */ 187 | #define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */ 188 | #define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */ 189 | #define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */ 190 | #define PCRE_ERROR_PARTIAL (-12) 191 | #define PCRE_ERROR_BADPARTIAL (-13) 192 | #define PCRE_ERROR_INTERNAL (-14) 193 | #define PCRE_ERROR_BADCOUNT (-15) 194 | #define PCRE_ERROR_DFA_UITEM (-16) 195 | #define PCRE_ERROR_DFA_UCOND (-17) 196 | #define PCRE_ERROR_DFA_UMLIMIT (-18) 197 | #define PCRE_ERROR_DFA_WSSIZE (-19) 198 | #define PCRE_ERROR_DFA_RECURSE (-20) 199 | #define PCRE_ERROR_RECURSIONLIMIT (-21) 200 | #define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */ 201 | #define PCRE_ERROR_BADNEWLINE (-23) 202 | #define PCRE_ERROR_BADOFFSET (-24) 203 | #define PCRE_ERROR_SHORTUTF8 (-25) 204 | #define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */ 205 | #define PCRE_ERROR_RECURSELOOP (-26) 206 | #define PCRE_ERROR_JIT_STACKLIMIT (-27) 207 | #define PCRE_ERROR_BADMODE (-28) 208 | #define PCRE_ERROR_BADENDIANNESS (-29) 209 | #define PCRE_ERROR_DFA_BADRESTART (-30) 210 | #define PCRE_ERROR_JIT_BADOPTION (-31) 211 | #define PCRE_ERROR_BADLENGTH (-32) 212 | #define PCRE_ERROR_UNSET (-33) 213 | 214 | /* Specific error codes for UTF-8 validity checks */ 215 | 216 | #define PCRE_UTF8_ERR0 0 217 | #define PCRE_UTF8_ERR1 1 218 | #define PCRE_UTF8_ERR2 2 219 | #define PCRE_UTF8_ERR3 3 220 | #define PCRE_UTF8_ERR4 4 221 | #define PCRE_UTF8_ERR5 5 222 | #define PCRE_UTF8_ERR6 6 223 | #define PCRE_UTF8_ERR7 7 224 | #define PCRE_UTF8_ERR8 8 225 | #define PCRE_UTF8_ERR9 9 226 | #define PCRE_UTF8_ERR10 10 227 | #define PCRE_UTF8_ERR11 11 228 | #define PCRE_UTF8_ERR12 12 229 | #define PCRE_UTF8_ERR13 13 230 | #define PCRE_UTF8_ERR14 14 231 | #define PCRE_UTF8_ERR15 15 232 | #define PCRE_UTF8_ERR16 16 233 | #define PCRE_UTF8_ERR17 17 234 | #define PCRE_UTF8_ERR18 18 235 | #define PCRE_UTF8_ERR19 19 236 | #define PCRE_UTF8_ERR20 20 237 | #define PCRE_UTF8_ERR21 21 238 | #define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */ 239 | 240 | /* Specific error codes for UTF-16 validity checks */ 241 | 242 | #define PCRE_UTF16_ERR0 0 243 | #define PCRE_UTF16_ERR1 1 244 | #define PCRE_UTF16_ERR2 2 245 | #define PCRE_UTF16_ERR3 3 246 | #define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */ 247 | 248 | /* Specific error codes for UTF-32 validity checks */ 249 | 250 | #define PCRE_UTF32_ERR0 0 251 | #define PCRE_UTF32_ERR1 1 252 | #define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */ 253 | #define PCRE_UTF32_ERR3 3 254 | 255 | /* Request types for pcre_fullinfo() */ 256 | 257 | #define PCRE_INFO_OPTIONS 0 258 | #define PCRE_INFO_SIZE 1 259 | #define PCRE_INFO_CAPTURECOUNT 2 260 | #define PCRE_INFO_BACKREFMAX 3 261 | #define PCRE_INFO_FIRSTBYTE 4 262 | #define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ 263 | #define PCRE_INFO_FIRSTTABLE 5 264 | #define PCRE_INFO_LASTLITERAL 6 265 | #define PCRE_INFO_NAMEENTRYSIZE 7 266 | #define PCRE_INFO_NAMECOUNT 8 267 | #define PCRE_INFO_NAMETABLE 9 268 | #define PCRE_INFO_STUDYSIZE 10 269 | #define PCRE_INFO_DEFAULT_TABLES 11 270 | #define PCRE_INFO_OKPARTIAL 12 271 | #define PCRE_INFO_JCHANGED 13 272 | #define PCRE_INFO_HASCRORLF 14 273 | #define PCRE_INFO_MINLENGTH 15 274 | #define PCRE_INFO_JIT 16 275 | #define PCRE_INFO_JITSIZE 17 276 | #define PCRE_INFO_MAXLOOKBEHIND 18 277 | #define PCRE_INFO_FIRSTCHARACTER 19 278 | #define PCRE_INFO_FIRSTCHARACTERFLAGS 20 279 | #define PCRE_INFO_REQUIREDCHAR 21 280 | #define PCRE_INFO_REQUIREDCHARFLAGS 22 281 | #define PCRE_INFO_MATCHLIMIT 23 282 | #define PCRE_INFO_RECURSIONLIMIT 24 283 | #define PCRE_INFO_MATCH_EMPTY 25 284 | 285 | /* Request types for pcre_config(). Do not re-arrange, in order to remain 286 | compatible. */ 287 | 288 | #define PCRE_CONFIG_UTF8 0 289 | #define PCRE_CONFIG_NEWLINE 1 290 | #define PCRE_CONFIG_LINK_SIZE 2 291 | #define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 292 | #define PCRE_CONFIG_MATCH_LIMIT 4 293 | #define PCRE_CONFIG_STACKRECURSE 5 294 | #define PCRE_CONFIG_UNICODE_PROPERTIES 6 295 | #define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 296 | #define PCRE_CONFIG_BSR 8 297 | #define PCRE_CONFIG_JIT 9 298 | #define PCRE_CONFIG_UTF16 10 299 | #define PCRE_CONFIG_JITTARGET 11 300 | #define PCRE_CONFIG_UTF32 12 301 | #define PCRE_CONFIG_PARENS_LIMIT 13 302 | 303 | /* Request types for pcre_study(). Do not re-arrange, in order to remain 304 | compatible. */ 305 | 306 | #define PCRE_STUDY_JIT_COMPILE 0x0001 307 | #define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002 308 | #define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004 309 | #define PCRE_STUDY_EXTRA_NEEDED 0x0008 310 | 311 | /* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine 312 | these bits, just add new ones on the end, in order to remain compatible. */ 313 | 314 | #define PCRE_EXTRA_STUDY_DATA 0x0001 315 | #define PCRE_EXTRA_MATCH_LIMIT 0x0002 316 | #define PCRE_EXTRA_CALLOUT_DATA 0x0004 317 | #define PCRE_EXTRA_TABLES 0x0008 318 | #define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010 319 | #define PCRE_EXTRA_MARK 0x0020 320 | #define PCRE_EXTRA_EXECUTABLE_JIT 0x0040 321 | 322 | /* Types */ 323 | 324 | struct real_pcre; /* declaration; the definition is private */ 325 | typedef struct real_pcre pcre; 326 | 327 | struct real_pcre16; /* declaration; the definition is private */ 328 | typedef struct real_pcre16 pcre16; 329 | 330 | struct real_pcre32; /* declaration; the definition is private */ 331 | typedef struct real_pcre32 pcre32; 332 | 333 | struct real_pcre_jit_stack; /* declaration; the definition is private */ 334 | typedef struct real_pcre_jit_stack pcre_jit_stack; 335 | 336 | struct real_pcre16_jit_stack; /* declaration; the definition is private */ 337 | typedef struct real_pcre16_jit_stack pcre16_jit_stack; 338 | 339 | struct real_pcre32_jit_stack; /* declaration; the definition is private */ 340 | typedef struct real_pcre32_jit_stack pcre32_jit_stack; 341 | 342 | /* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain 343 | a 16 bit wide signed data type. Otherwise it can be a dummy data type since 344 | pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */ 345 | #ifndef PCRE_UCHAR16 346 | #define PCRE_UCHAR16 unsigned short 347 | #endif 348 | 349 | #ifndef PCRE_SPTR16 350 | #define PCRE_SPTR16 const PCRE_UCHAR16 * 351 | #endif 352 | 353 | /* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain 354 | a 32 bit wide signed data type. Otherwise it can be a dummy data type since 355 | pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */ 356 | #ifndef PCRE_UCHAR32 357 | #define PCRE_UCHAR32 unsigned int 358 | #endif 359 | 360 | #ifndef PCRE_SPTR32 361 | #define PCRE_SPTR32 const PCRE_UCHAR32 * 362 | #endif 363 | 364 | /* When PCRE is compiled as a C++ library, the subject pointer type can be 365 | replaced with a custom type. For conventional use, the public interface is a 366 | const char *. */ 367 | 368 | #ifndef PCRE_SPTR 369 | #define PCRE_SPTR const char * 370 | #endif 371 | 372 | /* The structure for passing additional data to pcre_exec(). This is defined in 373 | such as way as to be extensible. Always add new fields at the end, in order to 374 | remain compatible. */ 375 | 376 | typedef struct pcre_extra { 377 | unsigned long int flags; /* Bits for which fields are set */ 378 | void *study_data; /* Opaque data from pcre_study() */ 379 | unsigned long int match_limit; /* Maximum number of calls to match() */ 380 | void *callout_data; /* Data passed back in callouts */ 381 | const unsigned char *tables; /* Pointer to character tables */ 382 | unsigned long int match_limit_recursion; /* Max recursive calls to match() */ 383 | unsigned char **mark; /* For passing back a mark pointer */ 384 | void *executable_jit; /* Contains a pointer to a compiled jit code */ 385 | } pcre_extra; 386 | 387 | /* Same structure as above, but with 16 bit char pointers. */ 388 | 389 | typedef struct pcre16_extra { 390 | unsigned long int flags; /* Bits for which fields are set */ 391 | void *study_data; /* Opaque data from pcre_study() */ 392 | unsigned long int match_limit; /* Maximum number of calls to match() */ 393 | void *callout_data; /* Data passed back in callouts */ 394 | const unsigned char *tables; /* Pointer to character tables */ 395 | unsigned long int match_limit_recursion; /* Max recursive calls to match() */ 396 | PCRE_UCHAR16 **mark; /* For passing back a mark pointer */ 397 | void *executable_jit; /* Contains a pointer to a compiled jit code */ 398 | } pcre16_extra; 399 | 400 | /* Same structure as above, but with 32 bit char pointers. */ 401 | 402 | typedef struct pcre32_extra { 403 | unsigned long int flags; /* Bits for which fields are set */ 404 | void *study_data; /* Opaque data from pcre_study() */ 405 | unsigned long int match_limit; /* Maximum number of calls to match() */ 406 | void *callout_data; /* Data passed back in callouts */ 407 | const unsigned char *tables; /* Pointer to character tables */ 408 | unsigned long int match_limit_recursion; /* Max recursive calls to match() */ 409 | PCRE_UCHAR32 **mark; /* For passing back a mark pointer */ 410 | void *executable_jit; /* Contains a pointer to a compiled jit code */ 411 | } pcre32_extra; 412 | 413 | /* The structure for passing out data via the pcre_callout_function. We use a 414 | structure so that new fields can be added on the end in future versions, 415 | without changing the API of the function, thereby allowing old clients to work 416 | without modification. */ 417 | 418 | typedef struct pcre_callout_block { 419 | int version; /* Identifies version of block */ 420 | /* ------------------------ Version 0 ------------------------------- */ 421 | int callout_number; /* Number compiled into pattern */ 422 | int *offset_vector; /* The offset vector */ 423 | PCRE_SPTR subject; /* The subject being matched */ 424 | int subject_length; /* The length of the subject */ 425 | int start_match; /* Offset to start of this match attempt */ 426 | int current_position; /* Where we currently are in the subject */ 427 | int capture_top; /* Max current capture */ 428 | int capture_last; /* Most recently closed capture */ 429 | void *callout_data; /* Data passed in with the call */ 430 | /* ------------------- Added for Version 1 -------------------------- */ 431 | int pattern_position; /* Offset to next item in the pattern */ 432 | int next_item_length; /* Length of next item in the pattern */ 433 | /* ------------------- Added for Version 2 -------------------------- */ 434 | const unsigned char *mark; /* Pointer to current mark or NULL */ 435 | /* ------------------------------------------------------------------ */ 436 | } pcre_callout_block; 437 | 438 | /* Same structure as above, but with 16 bit char pointers. */ 439 | 440 | typedef struct pcre16_callout_block { 441 | int version; /* Identifies version of block */ 442 | /* ------------------------ Version 0 ------------------------------- */ 443 | int callout_number; /* Number compiled into pattern */ 444 | int *offset_vector; /* The offset vector */ 445 | PCRE_SPTR16 subject; /* The subject being matched */ 446 | int subject_length; /* The length of the subject */ 447 | int start_match; /* Offset to start of this match attempt */ 448 | int current_position; /* Where we currently are in the subject */ 449 | int capture_top; /* Max current capture */ 450 | int capture_last; /* Most recently closed capture */ 451 | void *callout_data; /* Data passed in with the call */ 452 | /* ------------------- Added for Version 1 -------------------------- */ 453 | int pattern_position; /* Offset to next item in the pattern */ 454 | int next_item_length; /* Length of next item in the pattern */ 455 | /* ------------------- Added for Version 2 -------------------------- */ 456 | const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */ 457 | /* ------------------------------------------------------------------ */ 458 | } pcre16_callout_block; 459 | 460 | /* Same structure as above, but with 32 bit char pointers. */ 461 | 462 | typedef struct pcre32_callout_block { 463 | int version; /* Identifies version of block */ 464 | /* ------------------------ Version 0 ------------------------------- */ 465 | int callout_number; /* Number compiled into pattern */ 466 | int *offset_vector; /* The offset vector */ 467 | PCRE_SPTR32 subject; /* The subject being matched */ 468 | int subject_length; /* The length of the subject */ 469 | int start_match; /* Offset to start of this match attempt */ 470 | int current_position; /* Where we currently are in the subject */ 471 | int capture_top; /* Max current capture */ 472 | int capture_last; /* Most recently closed capture */ 473 | void *callout_data; /* Data passed in with the call */ 474 | /* ------------------- Added for Version 1 -------------------------- */ 475 | int pattern_position; /* Offset to next item in the pattern */ 476 | int next_item_length; /* Length of next item in the pattern */ 477 | /* ------------------- Added for Version 2 -------------------------- */ 478 | const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */ 479 | /* ------------------------------------------------------------------ */ 480 | } pcre32_callout_block; 481 | 482 | /* Indirection for store get and free functions. These can be set to 483 | alternative malloc/free functions if required. Special ones are used in the 484 | non-recursive case for "frames". There is also an optional callout function 485 | that is triggered by the (?) regex item. For Virtual Pascal, these definitions 486 | have to take another form. */ 487 | 488 | #ifndef VPCOMPAT 489 | PCRE_EXP_DECL void *(*pcre_malloc)(size_t); 490 | PCRE_EXP_DECL void (*pcre_free)(void *); 491 | PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); 492 | PCRE_EXP_DECL void (*pcre_stack_free)(void *); 493 | PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); 494 | PCRE_EXP_DECL int (*pcre_stack_guard)(void); 495 | 496 | PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); 497 | PCRE_EXP_DECL void (*pcre16_free)(void *); 498 | PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t); 499 | PCRE_EXP_DECL void (*pcre16_stack_free)(void *); 500 | PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *); 501 | PCRE_EXP_DECL int (*pcre16_stack_guard)(void); 502 | 503 | PCRE_EXP_DECL void *(*pcre32_malloc)(size_t); 504 | PCRE_EXP_DECL void (*pcre32_free)(void *); 505 | PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t); 506 | PCRE_EXP_DECL void (*pcre32_stack_free)(void *); 507 | PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *); 508 | PCRE_EXP_DECL int (*pcre32_stack_guard)(void); 509 | #else /* VPCOMPAT */ 510 | PCRE_EXP_DECL void *pcre_malloc(size_t); 511 | PCRE_EXP_DECL void pcre_free(void *); 512 | PCRE_EXP_DECL void *pcre_stack_malloc(size_t); 513 | PCRE_EXP_DECL void pcre_stack_free(void *); 514 | PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); 515 | PCRE_EXP_DECL int pcre_stack_guard(void); 516 | 517 | PCRE_EXP_DECL void *pcre16_malloc(size_t); 518 | PCRE_EXP_DECL void pcre16_free(void *); 519 | PCRE_EXP_DECL void *pcre16_stack_malloc(size_t); 520 | PCRE_EXP_DECL void pcre16_stack_free(void *); 521 | PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *); 522 | PCRE_EXP_DECL int pcre16_stack_guard(void); 523 | 524 | PCRE_EXP_DECL void *pcre32_malloc(size_t); 525 | PCRE_EXP_DECL void pcre32_free(void *); 526 | PCRE_EXP_DECL void *pcre32_stack_malloc(size_t); 527 | PCRE_EXP_DECL void pcre32_stack_free(void *); 528 | PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *); 529 | PCRE_EXP_DECL int pcre32_stack_guard(void); 530 | #endif /* VPCOMPAT */ 531 | 532 | /* User defined callback which provides a stack just before the match starts. */ 533 | 534 | typedef pcre_jit_stack *(*pcre_jit_callback)(void *); 535 | typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *); 536 | typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *); 537 | 538 | /* Exported PCRE functions */ 539 | 540 | PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, 541 | const unsigned char *); 542 | PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *, 543 | const unsigned char *); 544 | PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *, 545 | const unsigned char *); 546 | PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, 547 | int *, const unsigned char *); 548 | PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, 549 | int *, const unsigned char *); 550 | PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **, 551 | int *, const unsigned char *); 552 | PCRE_EXP_DECL int pcre_config(int, void *); 553 | PCRE_EXP_DECL int pcre16_config(int, void *); 554 | PCRE_EXP_DECL int pcre32_config(int, void *); 555 | PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, 556 | int *, int, const char *, char *, int); 557 | PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16, 558 | int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int); 559 | PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32, 560 | int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int); 561 | PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, 562 | char *, int); 563 | PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int, 564 | PCRE_UCHAR16 *, int); 565 | PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int, 566 | PCRE_UCHAR32 *, int); 567 | PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, 568 | const char *, int, int, int, int *, int , int *, int); 569 | PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, 570 | PCRE_SPTR16, int, int, int, int *, int , int *, int); 571 | PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *, 572 | PCRE_SPTR32, int, int, int, int *, int , int *, int); 573 | PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, 574 | int, int, int, int *, int); 575 | PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *, 576 | PCRE_SPTR16, int, int, int, int *, int); 577 | PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *, 578 | PCRE_SPTR32, int, int, int, int *, int); 579 | PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *, 580 | PCRE_SPTR, int, int, int, int *, int, 581 | pcre_jit_stack *); 582 | PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *, 583 | PCRE_SPTR16, int, int, int, int *, int, 584 | pcre16_jit_stack *); 585 | PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *, 586 | PCRE_SPTR32, int, int, int, int *, int, 587 | pcre32_jit_stack *); 588 | PCRE_EXP_DECL void pcre_free_substring(const char *); 589 | PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16); 590 | PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32); 591 | PCRE_EXP_DECL void pcre_free_substring_list(const char **); 592 | PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *); 593 | PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *); 594 | PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, 595 | void *); 596 | PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, 597 | void *); 598 | PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int, 599 | void *); 600 | PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, 601 | int *, int, const char *, const char **); 602 | PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16, 603 | int *, int, PCRE_SPTR16, PCRE_SPTR16 *); 604 | PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32, 605 | int *, int, PCRE_SPTR32, PCRE_SPTR32 *); 606 | PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); 607 | PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16); 608 | PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32); 609 | PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, 610 | char **, char **); 611 | PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16, 612 | PCRE_UCHAR16 **, PCRE_UCHAR16 **); 613 | PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32, 614 | PCRE_UCHAR32 **, PCRE_UCHAR32 **); 615 | PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, 616 | const char **); 617 | PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int, 618 | PCRE_SPTR16 *); 619 | PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int, 620 | PCRE_SPTR32 *); 621 | PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, 622 | const char ***); 623 | PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int, 624 | PCRE_SPTR16 **); 625 | PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int, 626 | PCRE_SPTR32 **); 627 | PCRE_EXP_DECL const unsigned char *pcre_maketables(void); 628 | PCRE_EXP_DECL const unsigned char *pcre16_maketables(void); 629 | PCRE_EXP_DECL const unsigned char *pcre32_maketables(void); 630 | PCRE_EXP_DECL int pcre_refcount(pcre *, int); 631 | PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int); 632 | PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int); 633 | PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); 634 | PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **); 635 | PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **); 636 | PCRE_EXP_DECL void pcre_free_study(pcre_extra *); 637 | PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *); 638 | PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *); 639 | PCRE_EXP_DECL const char *pcre_version(void); 640 | PCRE_EXP_DECL const char *pcre16_version(void); 641 | PCRE_EXP_DECL const char *pcre32_version(void); 642 | 643 | /* Utility functions for byte order swaps. */ 644 | PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, 645 | const unsigned char *); 646 | PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *, 647 | const unsigned char *); 648 | PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *, 649 | const unsigned char *); 650 | PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *, 651 | PCRE_SPTR16, int, int *, int); 652 | PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *, 653 | PCRE_SPTR32, int, int *, int); 654 | 655 | /* JIT compiler related functions. */ 656 | 657 | PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int); 658 | PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int); 659 | PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int); 660 | PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *); 661 | PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *); 662 | PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *); 663 | PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, 664 | pcre_jit_callback, void *); 665 | PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *, 666 | pcre16_jit_callback, void *); 667 | PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *, 668 | pcre32_jit_callback, void *); 669 | PCRE_EXP_DECL void pcre_jit_free_unused_memory(void); 670 | PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void); 671 | PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void); 672 | 673 | #ifdef __cplusplus 674 | } /* extern "C" */ 675 | #endif 676 | 677 | #endif /* End of pcre.h */ 678 | -------------------------------------------------------------------------------- /pcre_fallback.h: -------------------------------------------------------------------------------- 1 | #ifndef PCRE_NEVER_UTF 2 | #define PCRE_NEVER_UTF 0x0 3 | #endif 4 | -------------------------------------------------------------------------------- /pcre_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2011 Florian Weimer 2 | 3 | package pcre 4 | 5 | import ( 6 | "reflect" 7 | "testing" 8 | ) 9 | 10 | func TestCompile(t *testing.T) { 11 | var check = func(p string, groups int) { 12 | re, err := Compile(p, 0) 13 | if err != nil { 14 | t.Error(p, err) 15 | } 16 | if g := re.Groups(); g != groups { 17 | t.Error(p, g) 18 | } 19 | } 20 | check("", 0) 21 | check("^", 0) 22 | check("^$", 0) 23 | check("()", 1) 24 | check("(())", 2) 25 | check("((?:))", 1) 26 | } 27 | 28 | func TestCompileFail(t *testing.T) { 29 | var check = func(p, msg string, off int) { 30 | _, err := Compile(p, 0) 31 | if err == nil { 32 | t.Error(p) 33 | } else { 34 | cerr := err.(*CompileError) 35 | switch { 36 | case cerr.Message != msg: 37 | t.Error(p, "Message", cerr.Message) 38 | case cerr.Offset != off: 39 | t.Error(p, "Offset", cerr.Offset) 40 | } 41 | } 42 | } 43 | check("(", "missing )", 1) 44 | check("\\", "\\ at end of pattern", 1) 45 | check("abc\\", "\\ at end of pattern", 4) 46 | check("abc\000", "NUL byte in pattern", 3) 47 | check("a\000bc", "NUL byte in pattern", 1) 48 | } 49 | 50 | func strings(b [][]byte) (r []string) { 51 | r = make([]string, len(b)) 52 | for i, v := range b { 53 | r[i] = string(v) 54 | } 55 | return 56 | } 57 | 58 | func equal(l, r []string) bool { 59 | if len(l) != len(r) { 60 | return false 61 | } 62 | for i, lv := range l { 63 | if lv != r[i] { 64 | return false 65 | } 66 | } 67 | return true 68 | } 69 | 70 | func checkmatch1(t *testing.T, dostring bool, m *Matcher, 71 | pattern, subject string, args ...interface{}) { 72 | re := MustCompile(pattern, 0) 73 | var prefix string 74 | if dostring { 75 | if m == nil { 76 | m = re.MatcherString(subject, 0) 77 | } else { 78 | m.ResetString(re, subject, 0) 79 | } 80 | prefix = "string" 81 | } else { 82 | if m == nil { 83 | m = re.Matcher([]byte(subject), 0) 84 | } else { 85 | m.Reset(re, []byte(subject), 0) 86 | } 87 | prefix = "[]byte" 88 | } 89 | if len(args) == 0 { 90 | if m.Matches() { 91 | t.Error(prefix, pattern, subject, "!Matches") 92 | } 93 | } else { 94 | if !m.Matches() { 95 | t.Error(prefix, pattern, subject, "Matches") 96 | return 97 | } 98 | if m.Groups() != len(args)-1 { 99 | t.Error(prefix, pattern, subject, "Groups", m.Groups()) 100 | return 101 | } 102 | for i, arg := range args { 103 | if s, ok := arg.(string); ok { 104 | if !m.Present(i) { 105 | t.Error(prefix, pattern, subject, 106 | "Present", i) 107 | 108 | } 109 | if g := string(m.Group(i)); g != s { 110 | t.Error(prefix, pattern, subject, 111 | "Group", i, g, "!=", s) 112 | } 113 | if g := m.GroupString(i); g != s { 114 | t.Error(prefix, pattern, subject, 115 | "GroupString", i, g, "!=", s) 116 | } 117 | } else { 118 | if m.Present(i) { 119 | t.Error(prefix, pattern, subject, 120 | "!Present", i) 121 | } 122 | } 123 | } 124 | } 125 | } 126 | 127 | func TestMatcher(t *testing.T) { 128 | var m Matcher 129 | check := func(pattern, subject string, args ...interface{}) { 130 | checkmatch1(t, false, nil, pattern, subject, args...) 131 | checkmatch1(t, true, nil, pattern, subject, args...) 132 | checkmatch1(t, false, &m, pattern, subject, args...) 133 | checkmatch1(t, true, &m, pattern, subject, args...) 134 | } 135 | 136 | check(`^$`, "", "") 137 | check(`^abc$`, "abc", "abc") 138 | check(`^(X)*ab(c)$`, "abc", "abc", nil, "c") 139 | check(`^(X)*ab()c$`, "abc", "abc", nil, "") 140 | check(`^.*$`, "abc", "abc") 141 | check(`^.*$`, "a\000c", "a\000c") 142 | check(`^(.*)$`, "a\000c", "a\000c", "a\000c") 143 | check(`def`, "abcdefghi", "def") 144 | } 145 | 146 | func TestPartial(t *testing.T) { 147 | re := MustCompile(`^abc`, 0) 148 | defer re.FreeRegexp() 149 | // Check we get a partial match when we should 150 | m := re.MatcherString("ab", PARTIAL_SOFT) 151 | if !m.Matches() { 152 | t.Error("Failed to find any matches") 153 | } else if !m.Partial() { 154 | t.Error("The match was not partial") 155 | } 156 | 157 | // Check we get an exact match when we should 158 | m = re.MatcherString("abc", PARTIAL_SOFT) 159 | if !m.Matches() { 160 | t.Error("Failed to find any matches") 161 | } else if m.Partial() { 162 | t.Error("Match was partial but should have been exact") 163 | } 164 | 165 | m = re.Matcher([]byte("ab"), PARTIAL_SOFT) 166 | if !m.Matches() { 167 | t.Error("Failed to find any matches") 168 | } else if !m.Partial() { 169 | t.Error("The match was not partial") 170 | } 171 | 172 | m = re.Matcher([]byte("abc"), PARTIAL_SOFT) 173 | if !m.Matches() { 174 | t.Error("Failed to find any matches") 175 | } else if m.Partial() { 176 | t.Error("Match was partial but should have been exact") 177 | } 178 | } 179 | 180 | func TestCaseless(t *testing.T) { 181 | re := MustCompile("abc", CASELESS) 182 | defer re.FreeRegexp() 183 | m := re.MatcherString("...Abc...", 0) 184 | if !m.Matches() { 185 | t.Error("CASELESS") 186 | } 187 | re2 := MustCompile("abc", 0) 188 | defer re2.FreeRegexp() 189 | m = re2.MatcherString("Abc", 0) 190 | if m.Matches() { 191 | t.Error("!CASELESS") 192 | } 193 | } 194 | 195 | func TestNamed(t *testing.T) { 196 | pattern := "(?a)(?X)*bc(?\\d*)" 197 | re := MustCompile(pattern, 0) 198 | defer re.FreeRegexp() 199 | m := re.MatcherString("abc12", 0) 200 | if !m.Matches() { 201 | t.Error("Matches") 202 | } 203 | if ok, err := m.NamedPresent("L"); !ok || err != nil { 204 | t.Errorf("NamedPresent(\"L\"): %v", err) 205 | } 206 | if ok, err := m.NamedPresent("M"); ok || err != nil { 207 | t.Errorf("NamedPresent(\"M\"): %v", err) 208 | } 209 | if ok, err := m.NamedPresent("DIGITS"); !ok || err != nil { 210 | t.Errorf("NamedPresent(\"DIGITS\"): %v", err) 211 | } 212 | if str, err := m.NamedString("DIGITS"); str != "12" || err != nil { 213 | t.Errorf("NamedString(\"DIGITS\"): %v", err) 214 | } 215 | } 216 | 217 | func TestMatcherIndex(t *testing.T) { 218 | re := MustCompile("bcd", 0) 219 | defer re.FreeRegexp() 220 | m := re.Matcher([]byte("abcdef"), 0) 221 | i := m.Index() 222 | if i[0] != 1 { 223 | t.Error("FindIndex start", i[0]) 224 | } 225 | if i[1] != 4 { 226 | t.Error("FindIndex end", i[1]) 227 | } 228 | re2 := MustCompile("xyz", 0) 229 | defer re2.FreeRegexp() 230 | m = re2.Matcher([]byte("abcdef"), 0) 231 | i = m.Index() 232 | if i != nil { 233 | t.Error("Index returned for non-match", i) 234 | } 235 | } 236 | 237 | func TestFindIndex(t *testing.T) { 238 | re := MustCompile("bcd", 0) 239 | defer re.FreeRegexp() 240 | i := re.FindIndex([]byte("abcdef"), 0) 241 | if i[0] != 1 { 242 | t.Error("FindIndex start", i[0]) 243 | } 244 | if i[1] != 4 { 245 | t.Error("FindIndex end", i[1]) 246 | } 247 | } 248 | 249 | func TestExtract(t *testing.T) { 250 | re := MustCompile("b(c)(d)", 0) 251 | defer re.FreeRegexp() 252 | m := re.MatcherString("abcdef", 0) 253 | i := m.ExtractString() 254 | if i[0] != "abcdef" { 255 | t.Error("Full line unavailable: ", i[0]) 256 | } 257 | if i[1] != "c" { 258 | t.Error("First match group no as expected: ", i[1]) 259 | } 260 | if i[2] != "d" { 261 | t.Error("Second match group no as expected: ", i[2]) 262 | } 263 | } 264 | 265 | func TestReplaceAll(t *testing.T) { 266 | re := MustCompile("foo", 0) 267 | var result []byte 268 | var err error 269 | defer re.FreeRegexp() 270 | // Don't change at ends. 271 | if result, err = re.ReplaceAll( 272 | []byte("I like foods."), 273 | []byte("car"), 274 | 0, 275 | ); err != nil { 276 | t.Fatal(err) 277 | } 278 | if string(result) != "I like cards." { 279 | t.Error("ReplaceAll", result) 280 | } 281 | // Change at ends. 282 | if result, err = re.ReplaceAll( 283 | []byte("food fight fools foo"), 284 | []byte("car"), 285 | 0, 286 | ); err != nil { 287 | t.Fatal(err) 288 | } 289 | if string(result) != "card fight carls car" { 290 | t.Error("ReplaceAll2", result) 291 | } 292 | } 293 | 294 | func TestFreeRegexp(t *testing.T) { 295 | re := MustCompileJIT("\\d{3}", 0, STUDY_JIT_COMPILE) 296 | data := []string{"15asd213", "sadi32fjoi"} 297 | expected := []bool{true, false} 298 | for i := 0; i < len(data); i++ { 299 | m := re.MatcherString(data[i], 0) 300 | if m.Matches() != expected[i] { 301 | t.Error("Unexpected match for ", data[i]) 302 | } 303 | } 304 | re.FreeRegexp() 305 | 306 | // Test double free. 307 | re.FreeRegexp() 308 | } 309 | 310 | func TestFindAll(t *testing.T) { 311 | re := MustCompile("\\d{2}x", 0) 312 | var matches []Match 313 | var err error 314 | defer re.FreeRegexp() 315 | data := "12x 12332xf 43bx62x" 316 | expected := []Match{ 317 | Match{"12x", []int{0, 3}}, 318 | Match{"32x", []int{7, 10}}, 319 | Match{"62x", []int{16, 19}}, 320 | } 321 | if matches, err = re.FindAll(data, 0); err != nil { 322 | t.Fatal(err) 323 | } 324 | verifyMatches(t, expected, matches) 325 | 326 | if matches, err = re.FindAll("", 0); err != nil { 327 | t.Fatal(err) 328 | } 329 | if len(matches) != 0 { 330 | t.Error("Expected no results, got: ", matches) 331 | } 332 | 333 | // Test zero-length matches. 334 | re2 := MustCompile("\\w*", 0) 335 | defer re2.FreeRegexp() 336 | data = "cat dog" 337 | expected = []Match{ 338 | Match{"cat", []int{0, 3}}, 339 | Match{"", []int{3, 3}}, 340 | Match{"dog", []int{4, 7}}, 341 | } 342 | matches, err = re2.FindAll(data, 0) 343 | if err != nil { 344 | t.Fatal(err) 345 | } 346 | verifyMatches(t, expected, matches) 347 | } 348 | 349 | func verifyMatches(t *testing.T, expected []Match, matches []Match) { 350 | if len(matches) != len(expected) { 351 | t.Errorf("Expected %d matches, got: %d", len(expected), len(matches)) 352 | } 353 | for i := 0; i < len(expected); i++ { 354 | if !reflect.DeepEqual(matches[i], expected[i]) { 355 | t.Errorf("Expected match: %v, got: %v", expected[i], matches[i]) 356 | } 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /platform_darwin_amd64.go: -------------------------------------------------------------------------------- 1 | package pcre 2 | 3 | // #cgo LDFLAGS: ${SRCDIR}/libpcre_darwin_x86_64.a 4 | import "C" 5 | -------------------------------------------------------------------------------- /platform_darwin_arm64.go: -------------------------------------------------------------------------------- 1 | package pcre 2 | 3 | // #cgo LDFLAGS: ${SRCDIR}/libpcre_darwin_arm64.a 4 | import "C" 5 | -------------------------------------------------------------------------------- /platform_linux.go: -------------------------------------------------------------------------------- 1 | package pcre 2 | 3 | // #cgo LDFLAGS: ${SRCDIR}/libpcre_linux.a 4 | import "C" 5 | --------------------------------------------------------------------------------