├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── pcre.go ├── pcre_fallback.h └── pcre_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | [0-9].out 3 | *.[0-9] 4 | *.cgo?.* 5 | _cgo_* 6 | _obj 7 | _test 8 | _testmain.go 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | sudo: false 3 | addons: 4 | apt: 5 | packages: 6 | - libpcre++-dev 7 | go: 8 | - 1.3.3 9 | - 1.4.1 10 | - 1.4.2 11 | - 1.4.3 12 | - 1.5.1 13 | script: 14 | - go test -v ./... 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Go interface for PCRE is licensed according to the terms below. 2 | Note that PCRE and the Go implementation come with different terms. 3 | 4 | Copyright (c) 2011 Florian Weimer. All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-pcre 2 | 3 | [![GoDoc](https://godoc.org/github.com/gijsbers/go-pcre?status.svg)](https://godoc.org/github.com/gijsbers/go-pcre) 4 | 5 | This is a Go language package providing support for 6 | Perl Compatible Regular Expressions (PCRE). 7 | 8 | ## Installation 9 | 10 | Install the package for Debian as follows: 11 | 12 | sudo apt-get install libpcre++-dev 13 | go get github.com/gijsbers/go-pcre 14 | 15 | ## Usage 16 | 17 | Go programs that depend on this package should import 18 | this package as follows to allow automatic downloading: 19 | 20 | import "github.com/gijsbers/go-pcre" 21 | 22 | ## History 23 | 24 | This is a clone of 25 | [golang-pkg-pcre](http://git.enyo.de/fw/debian/golang-pkg-pcre.git) 26 | by Florian Weimer, which has been placed on Github by Glenn Brown, 27 | so it can be fetched automatically by Go's package installer. 28 | 29 | Glenn Brown added `FindIndex()` and `ReplaceAll()` 30 | to mimic functions in Go's default regexp package. 31 | 32 | Mathieu Payeur Levallois added `Matcher.ExtractString()`. 33 | 34 | Malte Nuhn added `GroupIndices()` to retrieve positions of a matching group. 35 | 36 | Chandra Sekar S added `Index()` and stopped invoking `Match()` twice in `FindIndex()`. 37 | 38 | Misakwa added support for `pkg-config` to locate `libpcre`. 39 | 40 | Yann Ramin added `ReplaceAllString()` and changed `Compile()` return type to `error`. 41 | 42 | Nikolay Sivko modified `name2index()` to return error instead of panic. 43 | 44 | Harry Waye exposed raw `pcre_exec`. 45 | 46 | Hazzadous added partial match support. 47 | 48 | Pavel Gryaznov added support for JIT compilation. 49 | -------------------------------------------------------------------------------- /pcre.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Florian Weimer. All rights reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // 10 | // * Redistributions in binary form must reproduce the above copyright 11 | // notice, this list of conditions and the following disclaimer in the 12 | // documentation and/or other materials provided with the distribution. 13 | // 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | // Package pcre provides access to the Perl Compatible Regular 27 | // Expresion library, PCRE. 28 | // 29 | // It implements two main types, Regexp and Matcher. Regexp objects 30 | // store a compiled regular expression. They consist of two immutable 31 | // parts: pcre and pcre_extra. Compile()/MustCompile() initialize pcre. 32 | // Calling Study() on a compiled Regexp initializes pcre_extra. 33 | // Compilation of regular expressions using Compile or MustCompile is 34 | // slightly expensive, so these objects should be kept and reused, 35 | // instead of compiling them from scratch for each matching attempt. 36 | // CompileJIT and MustCompileJIT are way more expensive, because they 37 | // run Study() after compiling a Regexp, but they tend to give 38 | // much better perfomance: 39 | // http://sljit.sourceforge.net/regex_perf.html 40 | // 41 | // Matcher objects keeps the results of a match against a []byte or 42 | // string subject. The Group and GroupString functions provide access 43 | // to capture groups; both versions work no matter if the subject was a 44 | // []byte or string, but the version with the matching type is slightly 45 | // more efficient. 46 | // 47 | // Matcher objects contain some temporary space and refer the original 48 | // subject. They are mutable and can be reused (using Match, 49 | // MatchString, Reset or ResetString). 50 | // 51 | // For details on the regular expression language implemented by this 52 | // package and the flags defined below, see the PCRE documentation. 53 | // http://www.pcre.org/pcre.txt 54 | package pcre 55 | 56 | // #cgo pkg-config: libpcre 57 | // #include 58 | // #include 59 | // #include "./pcre_fallback.h" 60 | import "C" 61 | 62 | import ( 63 | "fmt" 64 | "strconv" 65 | "unsafe" 66 | ) 67 | 68 | // Flags for Compile and Match functions. 69 | const ( 70 | ANCHORED = C.PCRE_ANCHORED 71 | BSR_ANYCRLF = C.PCRE_BSR_ANYCRLF 72 | BSR_UNICODE = C.PCRE_BSR_UNICODE 73 | NEWLINE_ANY = C.PCRE_NEWLINE_ANY 74 | NEWLINE_ANYCRLF = C.PCRE_NEWLINE_ANYCRLF 75 | NEWLINE_CR = C.PCRE_NEWLINE_CR 76 | NEWLINE_CRLF = C.PCRE_NEWLINE_CRLF 77 | NEWLINE_LF = C.PCRE_NEWLINE_LF 78 | NO_START_OPTIMIZE = C.PCRE_NO_START_OPTIMIZE 79 | NO_UTF8_CHECK = C.PCRE_NO_UTF8_CHECK 80 | ) 81 | 82 | // Flags for Compile functions 83 | const ( 84 | CASELESS = C.PCRE_CASELESS 85 | DOLLAR_ENDONLY = C.PCRE_DOLLAR_ENDONLY 86 | DOTALL = C.PCRE_DOTALL 87 | DUPNAMES = C.PCRE_DUPNAMES 88 | EXTENDED = C.PCRE_EXTENDED 89 | EXTRA = C.PCRE_EXTRA 90 | FIRSTLINE = C.PCRE_FIRSTLINE 91 | JAVASCRIPT_COMPAT = C.PCRE_JAVASCRIPT_COMPAT 92 | MULTILINE = C.PCRE_MULTILINE 93 | NEVER_UTF = C.PCRE_NEVER_UTF 94 | NO_AUTO_CAPTURE = C.PCRE_NO_AUTO_CAPTURE 95 | UNGREEDY = C.PCRE_UNGREEDY 96 | UTF8 = C.PCRE_UTF8 97 | UCP = C.PCRE_UCP 98 | ) 99 | 100 | // Flags for Match functions 101 | const ( 102 | NOTBOL = C.PCRE_NOTBOL 103 | NOTEOL = C.PCRE_NOTEOL 104 | NOTEMPTY = C.PCRE_NOTEMPTY 105 | NOTEMPTY_ATSTART = C.PCRE_NOTEMPTY_ATSTART 106 | PARTIAL_HARD = C.PCRE_PARTIAL_HARD 107 | PARTIAL_SOFT = C.PCRE_PARTIAL_SOFT 108 | ) 109 | 110 | // Flags for Study function 111 | const ( 112 | STUDY_JIT_COMPILE = C.PCRE_STUDY_JIT_COMPILE 113 | STUDY_JIT_PARTIAL_SOFT_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 114 | STUDY_JIT_PARTIAL_HARD_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 115 | ) 116 | 117 | // Exec-time and get/set-time error codes 118 | const ( 119 | ERROR_NOMATCH = C.PCRE_ERROR_NOMATCH 120 | ERROR_NULL = C.PCRE_ERROR_NULL 121 | ERROR_BADOPTION = C.PCRE_ERROR_BADOPTION 122 | ERROR_BADMAGIC = C.PCRE_ERROR_BADMAGIC 123 | ERROR_UNKNOWN_OPCODE = C.PCRE_ERROR_UNKNOWN_OPCODE 124 | ERROR_UNKNOWN_NODE = C.PCRE_ERROR_UNKNOWN_NODE 125 | ERROR_NOMEMORY = C.PCRE_ERROR_NOMEMORY 126 | ERROR_NOSUBSTRING = C.PCRE_ERROR_NOSUBSTRING 127 | ERROR_MATCHLIMIT = C.PCRE_ERROR_MATCHLIMIT 128 | ERROR_CALLOUT = C.PCRE_ERROR_CALLOUT 129 | ERROR_BADUTF8 = C.PCRE_ERROR_BADUTF8 130 | ERROR_BADUTF8_OFFSET = C.PCRE_ERROR_BADUTF8_OFFSET 131 | ERROR_PARTIAL = C.PCRE_ERROR_PARTIAL 132 | ERROR_BADPARTIAL = C.PCRE_ERROR_BADPARTIAL 133 | ERROR_RECURSIONLIMIT = C.PCRE_ERROR_RECURSIONLIMIT 134 | ERROR_INTERNAL = C.PCRE_ERROR_INTERNAL 135 | ERROR_BADCOUNT = C.PCRE_ERROR_BADCOUNT 136 | ERROR_JIT_STACKLIMIT = C.PCRE_ERROR_JIT_STACKLIMIT 137 | ) 138 | 139 | // Regexp holds a reference to a compiled regular expression. 140 | // Use Compile or MustCompile to create such objects. 141 | type Regexp struct { 142 | ptr []byte 143 | extra []byte 144 | } 145 | 146 | // Number of bytes in the compiled pattern 147 | func pcreSize(ptr *C.pcre) (size C.size_t) { 148 | C.pcre_fullinfo(ptr, nil, C.PCRE_INFO_SIZE, unsafe.Pointer(&size)) 149 | return 150 | } 151 | 152 | // Number of capture groups 153 | func pcreGroups(ptr *C.pcre) (count C.int) { 154 | C.pcre_fullinfo(ptr, nil, 155 | C.PCRE_INFO_CAPTURECOUNT, unsafe.Pointer(&count)) 156 | return 157 | } 158 | 159 | // Move pattern to the Go heap so that we do not have to use a 160 | // finalizer. PCRE patterns are fully relocatable. (We do not use 161 | // custom character tables.) 162 | func toHeap(ptr *C.pcre) (re Regexp) { 163 | defer C.free(unsafe.Pointer(ptr)) 164 | size := pcreSize(ptr) 165 | re.ptr = make([]byte, size) 166 | C.memcpy(unsafe.Pointer(&re.ptr[0]), unsafe.Pointer(ptr), size) 167 | return 168 | } 169 | 170 | // Compile the pattern and return a compiled regexp. 171 | // If compilation fails, the second return value holds a *CompileError. 172 | func Compile(pattern string, flags int) (Regexp, error) { 173 | pattern1 := C.CString(pattern) 174 | defer C.free(unsafe.Pointer(pattern1)) 175 | if clen := int(C.strlen(pattern1)); clen != len(pattern) { 176 | return Regexp{}, &CompileError{ 177 | Pattern: pattern, 178 | Message: "NUL byte in pattern", 179 | Offset: clen, 180 | } 181 | } 182 | var errptr *C.char 183 | var erroffset C.int 184 | ptr := C.pcre_compile(pattern1, C.int(flags), &errptr, &erroffset, nil) 185 | if ptr == nil { 186 | return Regexp{}, &CompileError{ 187 | Pattern: pattern, 188 | Message: C.GoString(errptr), 189 | Offset: int(erroffset), 190 | } 191 | } 192 | heap := toHeap(ptr) 193 | return heap, nil 194 | } 195 | 196 | // CompileJIT is a combination of Compile and Study. It first compiles 197 | // the pattern and if this succeeds calls Study on the compiled pattern. 198 | // comFlags are Compile flags, jitFlags are study flags. 199 | // If compilation fails, the second return value holds a *CompileError. 200 | func CompileJIT(pattern string, comFlags, jitFlags int) (Regexp, error) { 201 | re, err := Compile(pattern, comFlags) 202 | if err == nil { 203 | err = (&re).Study(jitFlags) 204 | } 205 | return re, err 206 | } 207 | 208 | // MustCompile compiles the pattern. If compilation fails, panic. 209 | func MustCompile(pattern string, flags int) (re Regexp) { 210 | re, err := Compile(pattern, flags) 211 | if err != nil { 212 | panic(err) 213 | } 214 | return 215 | } 216 | 217 | // MustCompileJIT compiles and studies the pattern. On failure it panics. 218 | func MustCompileJIT(pattern string, comFlags, jitFlags int) (re Regexp) { 219 | re, err := CompileJIT(pattern, comFlags, jitFlags) 220 | if err != nil { 221 | panic(err) 222 | } 223 | return 224 | } 225 | 226 | // Study adds Just-In-Time compilation to a Regexp. This may give a huge 227 | // speed boost when matching. If an error occurs, return value is non-nil. 228 | // Flags optionally specifies JIT compilation options for partial matches. 229 | func (re *Regexp) Study(flags int) error { 230 | if re.extra != nil { 231 | return fmt.Errorf("Study: Regexp has already been optimized") 232 | } 233 | if flags == 0 { 234 | flags = STUDY_JIT_COMPILE 235 | } 236 | 237 | ptr := (*C.pcre)(unsafe.Pointer(&re.ptr[0])) 238 | var err *C.char 239 | extra := C.pcre_study(ptr, C.int(flags), &err) 240 | if err != nil { 241 | return fmt.Errorf("%s", C.GoString(err)) 242 | } 243 | if extra == nil { 244 | // Studying the pattern may not produce useful information. 245 | return nil 246 | } 247 | defer C.free(unsafe.Pointer(extra)) 248 | 249 | var size C.size_t 250 | rc := C.pcre_fullinfo(ptr, extra, C.PCRE_INFO_JITSIZE, unsafe.Pointer(&size)) 251 | if rc != 0 || size == 0 { 252 | return fmt.Errorf("Study failed to obtain JIT size (%d)", int(rc)) 253 | } 254 | re.extra = make([]byte, size) 255 | C.memcpy(unsafe.Pointer(&re.extra[0]), unsafe.Pointer(extra), size) 256 | return nil 257 | } 258 | 259 | // Groups returns the number of capture groups in the compiled pattern. 260 | func (re Regexp) Groups() int { 261 | if re.ptr == nil { 262 | panic("Regexp.Groups: uninitialized") 263 | } 264 | return int(pcreGroups((*C.pcre)(unsafe.Pointer(&re.ptr[0])))) 265 | } 266 | 267 | // Matcher objects provide a place for storing match results. 268 | // They can be created by the Matcher and MatcherString functions, 269 | // or they can be initialized with Reset or ResetString. 270 | type Matcher struct { 271 | re Regexp 272 | groups int 273 | ovector []C.int // scratch space for capture offsets 274 | matches bool // last match was successful 275 | partial bool // was the last match a partial match? 276 | subjects string // one of these fields is set to record the subject, 277 | subjectb []byte // so that Group/GroupString can return slices 278 | } 279 | 280 | // NewMatcher creates a new matcher object for the given Regexp. 281 | func (re Regexp) NewMatcher() (m *Matcher) { 282 | m = new(Matcher) 283 | m.Init(&re) 284 | return 285 | } 286 | 287 | // Matcher creates a new matcher object, with the byte slice as subject. 288 | // It also starts a first match on subject. Test for success with Matches(). 289 | func (re Regexp) Matcher(subject []byte, flags int) (m *Matcher) { 290 | m = re.NewMatcher() 291 | m.Match(subject, flags) 292 | return 293 | } 294 | 295 | // MatcherString creates a new matcher, with the specified subject string. 296 | // It also starts a first match on subject. Test for success with Matches(). 297 | func (re Regexp) MatcherString(subject string, flags int) (m *Matcher) { 298 | m = re.NewMatcher() 299 | m.MatchString(subject, flags) 300 | return 301 | } 302 | 303 | // Reset switches the matcher object to the specified regexp and subject. 304 | // It also starts a first match on subject. 305 | func (m *Matcher) Reset(re Regexp, subject []byte, flags int) bool { 306 | m.Init(&re) 307 | return m.Match(subject, flags) 308 | } 309 | 310 | // ResetString switches the matcher object to the given regexp and subject. 311 | // It also starts a first match on subject. 312 | func (m *Matcher) ResetString(re Regexp, subject string, flags int) bool { 313 | m.Init(&re) 314 | return m.MatchString(subject, flags) 315 | } 316 | 317 | // Init binds an existing Matcher object to the given Regexp. 318 | func (m *Matcher) Init(re *Regexp) { 319 | if re.ptr == nil { 320 | panic("Matcher.Init: uninitialized") 321 | } 322 | m.matches = false 323 | if m.re.ptr != nil && &m.re.ptr[0] == &re.ptr[0] { 324 | // Skip group count extraction if the matcher has 325 | // already been initialized with the same regular 326 | // expression. 327 | return 328 | } 329 | m.re = *re 330 | m.groups = re.Groups() 331 | if ovectorlen := 3 * (1 + m.groups); len(m.ovector) < ovectorlen { 332 | m.ovector = make([]C.int, ovectorlen) 333 | } 334 | } 335 | 336 | var nullbyte = []byte{0} 337 | 338 | // Match tries to match the specified byte slice to 339 | // the current pattern by calling Exec and collects the result. 340 | // Returns true if the match succeeds. 341 | func (m *Matcher) Match(subject []byte, flags int) bool { 342 | if m.re.ptr == nil { 343 | panic("Matcher.Match: uninitialized") 344 | } 345 | rc := m.Exec(subject, flags) 346 | m.matches = matched(rc) 347 | m.partial = (rc == ERROR_PARTIAL) 348 | return m.matches 349 | } 350 | 351 | // MatchString tries to match the specified subject string to 352 | // the current pattern by calling ExecString and collects the result. 353 | // Returns true if the match succeeds. 354 | func (m *Matcher) MatchString(subject string, flags int) bool { 355 | if m.re.ptr == nil { 356 | panic("Matcher.MatchString: uninitialized") 357 | } 358 | rc := m.ExecString(subject, flags) 359 | m.matches = matched(rc) 360 | m.partial = (rc == ERROR_PARTIAL) 361 | return m.matches 362 | } 363 | 364 | // Exec tries to match the specified byte slice to 365 | // the current pattern. Returns the raw pcre_exec error code. 366 | func (m *Matcher) Exec(subject []byte, flags int) int { 367 | if m.re.ptr == nil { 368 | panic("Matcher.Exec: uninitialized") 369 | } 370 | length := len(subject) 371 | m.subjects = "" 372 | m.subjectb = subject 373 | if length == 0 { 374 | subject = nullbyte // make first character adressable 375 | } 376 | subjectptr := (*C.char)(unsafe.Pointer(&subject[0])) 377 | return m.exec(subjectptr, length, flags) 378 | } 379 | 380 | // ExecString tries to match the specified subject string to 381 | // the current pattern. It returns the raw pcre_exec error code. 382 | func (m *Matcher) ExecString(subject string, flags int) int { 383 | if m.re.ptr == nil { 384 | panic("Matcher.ExecString: uninitialized") 385 | } 386 | length := len(subject) 387 | m.subjects = subject 388 | m.subjectb = nil 389 | if length == 0 { 390 | subject = "\000" // make first character addressable 391 | } 392 | // The following is a non-portable kludge to avoid a copy 393 | subjectptr := *(**C.char)(unsafe.Pointer(&subject)) 394 | return m.exec(subjectptr, length, flags) 395 | } 396 | 397 | func (m *Matcher) exec(subjectptr *C.char, length, flags int) int { 398 | var extra *C.pcre_extra 399 | if m.re.extra != nil { 400 | extra = (*C.pcre_extra)(unsafe.Pointer(&m.re.extra[0])) 401 | } 402 | rc := C.pcre_exec((*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), extra, 403 | subjectptr, C.int(length), 404 | 0, C.int(flags), &m.ovector[0], C.int(len(m.ovector))) 405 | return int(rc) 406 | } 407 | 408 | // matched checks the return code of a pattern match for success. 409 | func matched(rc int) bool { 410 | switch { 411 | case rc >= 0 || rc == C.PCRE_ERROR_PARTIAL: 412 | return true 413 | case rc == C.PCRE_ERROR_NOMATCH: 414 | return false 415 | case rc == C.PCRE_ERROR_BADOPTION: 416 | panic("PCRE.Match: invalid option flag") 417 | } 418 | panic("unexpected return code from pcre_exec: " + strconv.Itoa(rc)) 419 | } 420 | 421 | // Matches returns true if a previous call to Matcher, MatcherString, Reset, 422 | // ResetString, Match or MatchString succeeded. 423 | func (m *Matcher) Matches() bool { 424 | return m.matches 425 | } 426 | 427 | // Partial returns true if a previous call to Matcher, MatcherString, Reset, 428 | // ResetString, Match or MatchString found a partial match. 429 | func (m *Matcher) Partial() bool { 430 | return m.partial 431 | } 432 | 433 | // Groups returns the number of groups in the current pattern. 434 | func (m *Matcher) Groups() int { 435 | return m.groups 436 | } 437 | 438 | // Present returns true if the numbered capture group is present in the last 439 | // match (performed by Matcher, MatcherString, Reset, ResetString, 440 | // Match, or MatchString). Group numbers start at 1. A capture group 441 | // can be present and match the empty string. 442 | func (m *Matcher) Present(group int) bool { 443 | return m.ovector[2*group] >= 0 444 | } 445 | 446 | // Group returns the numbered capture group of the last match (performed by 447 | // Matcher, MatcherString, Reset, ResetString, Match, or MatchString). 448 | // Group 0 is the part of the subject which matches the whole pattern; 449 | // the first actual capture group is numbered 1. Capture groups which 450 | // are not present return a nil slice. 451 | func (m *Matcher) Group(group int) []byte { 452 | start := m.ovector[2*group] 453 | end := m.ovector[2*group+1] 454 | if start >= 0 { 455 | if m.subjectb != nil { 456 | return m.subjectb[start:end] 457 | } 458 | return []byte(m.subjects[start:end]) 459 | } 460 | return nil 461 | } 462 | 463 | // Extract returns a slice of byte slices for a single match. 464 | // The first byte slice contains the complete match. 465 | // Subsequent byte slices contain the captured groups. 466 | // If there was no match then nil is returned. 467 | func (m *Matcher) Extract() [][]byte { 468 | if !m.matches { 469 | return nil 470 | } 471 | extract := make([][]byte, m.groups+1) 472 | extract[0] = m.subjectb 473 | for i := 1; i <= m.groups; i++ { 474 | x0 := m.ovector[2*i] 475 | x1 := m.ovector[2*i+1] 476 | extract[i] = m.subjectb[x0:x1] 477 | } 478 | return extract 479 | } 480 | 481 | // ExtractString returns a slice of strings for a single match. 482 | // The first string contains the complete match. 483 | // Subsequent strings in the slice contain the captured groups. 484 | // If there was no match then nil is returned. 485 | func (m *Matcher) ExtractString() []string { 486 | if !m.matches { 487 | return nil 488 | } 489 | extract := make([]string, m.groups+1) 490 | extract[0] = m.subjects 491 | for i := 1; i <= m.groups; i++ { 492 | x0 := m.ovector[2*i] 493 | x1 := m.ovector[2*i+1] 494 | extract[i] = m.subjects[x0:x1] 495 | } 496 | return extract 497 | } 498 | 499 | // GroupIndices returns the numbered capture group positions of the last 500 | // match (performed by Matcher, MatcherString, Reset, ResetString, Match, 501 | // or MatchString). Group 0 is the part of the subject which matches 502 | // the whole pattern; the first actual capture group is numbered 1. 503 | // Capture groups which are not present return a nil slice. 504 | func (m *Matcher) GroupIndices(group int) []int { 505 | start := m.ovector[2*group] 506 | end := m.ovector[2*group+1] 507 | if start >= 0 { 508 | return []int{int(start), int(end)} 509 | } 510 | return nil 511 | } 512 | 513 | // GroupString returns the numbered capture group as a string. Group 0 514 | // is the part of the subject which matches the whole pattern; the first 515 | // actual capture group is numbered 1. Capture groups which are not 516 | // present return an empty string. 517 | func (m *Matcher) GroupString(group int) string { 518 | start := m.ovector[2*group] 519 | end := m.ovector[2*group+1] 520 | if start >= 0 { 521 | if m.subjectb != nil { 522 | return string(m.subjectb[start:end]) 523 | } 524 | return m.subjects[start:end] 525 | } 526 | return "" 527 | } 528 | 529 | // Index returns the start and end of the first match, if a previous 530 | // call to Matcher, MatcherString, Reset, ResetString, Match or 531 | // MatchString succeeded. loc[0] is the start and loc[1] is the end. 532 | func (m *Matcher) Index() (loc []int) { 533 | if !m.matches { 534 | return nil 535 | } 536 | loc = []int{int(m.ovector[0]), int(m.ovector[1])} 537 | return 538 | } 539 | 540 | // name2index converts a group name to its group index number. 541 | func (m *Matcher) name2index(name string) (int, error) { 542 | if m.re.ptr == nil { 543 | return 0, fmt.Errorf("Matcher.Named: uninitialized") 544 | } 545 | name1 := C.CString(name) 546 | defer C.free(unsafe.Pointer(name1)) 547 | group := int(C.pcre_get_stringnumber( 548 | (*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), name1)) 549 | if group < 0 { 550 | return group, fmt.Errorf("Matcher.Named: unknown name: " + name) 551 | } 552 | return group, nil 553 | } 554 | 555 | // Named returns the value of the named capture group. 556 | // This is a nil slice if the capture group is not present. 557 | // If the name does not refer to a group then error is non-nil. 558 | func (m *Matcher) Named(group string) ([]byte, error) { 559 | groupNum, err := m.name2index(group) 560 | if err != nil { 561 | return []byte{}, err 562 | } 563 | return m.Group(groupNum), nil 564 | } 565 | 566 | // NamedString returns the value of the named capture group, 567 | // or an empty string if the capture group is not present. 568 | // If the name does not refer to a group then error is non-nil. 569 | func (m *Matcher) NamedString(group string) (string, error) { 570 | groupNum, err := m.name2index(group) 571 | if err != nil { 572 | return "", err 573 | } 574 | return m.GroupString(groupNum), nil 575 | } 576 | 577 | // NamedPresent returns true if the named capture group is present. 578 | // If the name does not refer to a group then error is non-nil. 579 | func (m *Matcher) NamedPresent(group string) (bool, error) { 580 | groupNum, err := m.name2index(group) 581 | if err != nil { 582 | return false, err 583 | } 584 | return m.Present(groupNum), nil 585 | } 586 | 587 | // FindIndex returns the start and end of the first match, 588 | // or nil if no match. loc[0] is the start and loc[1] is the end. 589 | func (re *Regexp) FindIndex(bytes []byte, flags int) (loc []int) { 590 | m := re.Matcher(bytes, flags) 591 | if m.Matches() { 592 | loc = []int{int(m.ovector[0]), int(m.ovector[1])} 593 | return 594 | } 595 | return nil 596 | } 597 | 598 | // ReplaceAll returns a copy of a byte slice 599 | // where all pattern matches are replaced by repl. 600 | func (re Regexp) ReplaceAll(bytes, repl []byte, flags int) []byte { 601 | m := re.Matcher(bytes, flags) 602 | r := []byte{} 603 | for m.matches { 604 | r = append(append(r, bytes[:m.ovector[0]]...), repl...) 605 | bytes = bytes[m.ovector[1]:] 606 | m.Match(bytes, flags) 607 | } 608 | return append(r, bytes...) 609 | } 610 | 611 | // ReplaceAllString is equivalent to ReplaceAll with string return type. 612 | func (re Regexp) ReplaceAllString(in, repl string, flags int) string { 613 | return string(re.ReplaceAll([]byte(in), []byte(repl), flags)) 614 | } 615 | 616 | // CompileError holds details about a compilation error, 617 | // as returned by the Compile function. The offset is 618 | // the byte position in the pattern string at which the 619 | // error was detected. 620 | type CompileError struct { 621 | Pattern string // The failed pattern 622 | Message string // The error message 623 | Offset int // Byte position of error 624 | } 625 | 626 | // Error converts a compile error to a string 627 | func (e *CompileError) Error() string { 628 | return e.Pattern + " (" + strconv.Itoa(e.Offset) + "): " + e.Message 629 | } 630 | -------------------------------------------------------------------------------- /pcre_fallback.h: -------------------------------------------------------------------------------- 1 | #ifndef PCRE_NEVER_UTF 2 | #define PCRE_NEVER_UTF 0x0 3 | #endif 4 | -------------------------------------------------------------------------------- /pcre_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2011 Florian Weimer 2 | 3 | package pcre 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | func TestCompile(t *testing.T) { 10 | var check = func(p string, groups int) { 11 | re, err := Compile(p, 0) 12 | if err != nil { 13 | t.Error(p, err) 14 | } 15 | if g := re.Groups(); g != groups { 16 | t.Error(p, g) 17 | } 18 | } 19 | check("", 0) 20 | check("^", 0) 21 | check("^$", 0) 22 | check("()", 1) 23 | check("(())", 2) 24 | check("((?:))", 1) 25 | } 26 | 27 | func TestCompileFail(t *testing.T) { 28 | var check = func(p, msg string, off int) { 29 | _, err := Compile(p, 0) 30 | if err == nil { 31 | t.Error(p) 32 | } else { 33 | cerr := err.(*CompileError) 34 | switch { 35 | case cerr.Message != msg: 36 | t.Error(p, "Message", cerr.Message) 37 | case cerr.Offset != off: 38 | t.Error(p, "Offset", cerr.Offset) 39 | } 40 | } 41 | } 42 | check("(", "missing )", 1) 43 | check("\\", "\\ at end of pattern", 1) 44 | check("abc\\", "\\ at end of pattern", 4) 45 | check("abc\000", "NUL byte in pattern", 3) 46 | check("a\000bc", "NUL byte in pattern", 1) 47 | } 48 | 49 | func strings(b [][]byte) (r []string) { 50 | r = make([]string, len(b)) 51 | for i, v := range b { 52 | r[i] = string(v) 53 | } 54 | return 55 | } 56 | 57 | func equal(l, r []string) bool { 58 | if len(l) != len(r) { 59 | return false 60 | } 61 | for i, lv := range l { 62 | if lv != r[i] { 63 | return false 64 | } 65 | } 66 | return true 67 | } 68 | 69 | func checkmatch1(t *testing.T, dostring bool, m *Matcher, 70 | pattern, subject string, args ...interface{}) { 71 | re := MustCompile(pattern, 0) 72 | var prefix string 73 | if dostring { 74 | if m == nil { 75 | m = re.MatcherString(subject, 0) 76 | } else { 77 | m.ResetString(re, subject, 0) 78 | } 79 | prefix = "string" 80 | } else { 81 | if m == nil { 82 | m = re.Matcher([]byte(subject), 0) 83 | } else { 84 | m.Reset(re, []byte(subject), 0) 85 | } 86 | prefix = "[]byte" 87 | } 88 | if len(args) == 0 { 89 | if m.Matches() { 90 | t.Error(prefix, pattern, subject, "!Matches") 91 | } 92 | } else { 93 | if !m.Matches() { 94 | t.Error(prefix, pattern, subject, "Matches") 95 | return 96 | } 97 | if m.Groups() != len(args)-1 { 98 | t.Error(prefix, pattern, subject, "Groups", m.Groups()) 99 | return 100 | } 101 | for i, arg := range args { 102 | if s, ok := arg.(string); ok { 103 | if !m.Present(i) { 104 | t.Error(prefix, pattern, subject, 105 | "Present", i) 106 | 107 | } 108 | if g := string(m.Group(i)); g != s { 109 | t.Error(prefix, pattern, subject, 110 | "Group", i, g, "!=", s) 111 | } 112 | if g := m.GroupString(i); g != s { 113 | t.Error(prefix, pattern, subject, 114 | "GroupString", i, g, "!=", s) 115 | } 116 | } else { 117 | if m.Present(i) { 118 | t.Error(prefix, pattern, subject, 119 | "!Present", i) 120 | } 121 | } 122 | } 123 | } 124 | } 125 | 126 | func TestMatcher(t *testing.T) { 127 | var m Matcher 128 | check := func(pattern, subject string, args ...interface{}) { 129 | checkmatch1(t, false, nil, pattern, subject, args...) 130 | checkmatch1(t, true, nil, pattern, subject, args...) 131 | checkmatch1(t, false, &m, pattern, subject, args...) 132 | checkmatch1(t, true, &m, pattern, subject, args...) 133 | } 134 | 135 | check(`^$`, "", "") 136 | check(`^abc$`, "abc", "abc") 137 | check(`^(X)*ab(c)$`, "abc", "abc", nil, "c") 138 | check(`^(X)*ab()c$`, "abc", "abc", nil, "") 139 | check(`^.*$`, "abc", "abc") 140 | check(`^.*$`, "a\000c", "a\000c") 141 | check(`^(.*)$`, "a\000c", "a\000c", "a\000c") 142 | check(`def`, "abcdefghi", "def") 143 | } 144 | 145 | func TestPartial(t *testing.T) { 146 | re := MustCompile(`^abc`, 0) 147 | 148 | // Check we get a partial match when we should 149 | m := re.MatcherString("ab", PARTIAL_SOFT) 150 | if !m.Matches() { 151 | t.Error("Failed to find any matches") 152 | } else if !m.Partial() { 153 | t.Error("The match was not partial") 154 | } 155 | 156 | // Check we get an exact match when we should 157 | m = re.MatcherString("abc", PARTIAL_SOFT) 158 | if !m.Matches() { 159 | t.Error("Failed to find any matches") 160 | } else if m.Partial() { 161 | t.Error("Match was partial but should have been exact") 162 | } 163 | 164 | m = re.Matcher([]byte("ab"), PARTIAL_SOFT) 165 | if !m.Matches() { 166 | t.Error("Failed to find any matches") 167 | } else if !m.Partial() { 168 | t.Error("The match was not partial") 169 | } 170 | 171 | m = re.Matcher([]byte("abc"), PARTIAL_SOFT) 172 | if !m.Matches() { 173 | t.Error("Failed to find any matches") 174 | } else if m.Partial() { 175 | t.Error("Match was partial but should have been exact") 176 | } 177 | } 178 | 179 | func TestCaseless(t *testing.T) { 180 | m := MustCompile("abc", CASELESS).MatcherString("...Abc...", 0) 181 | if !m.Matches() { 182 | t.Error("CASELESS") 183 | } 184 | m = MustCompile("abc", 0).MatcherString("Abc", 0) 185 | if m.Matches() { 186 | t.Error("!CASELESS") 187 | } 188 | } 189 | 190 | func TestNamed(t *testing.T) { 191 | pattern := "(?a)(?X)*bc(?\\d*)" 192 | m := MustCompile(pattern, 0).MatcherString("abc12", 0) 193 | if !m.Matches() { 194 | t.Error("Matches") 195 | } 196 | if ok, err := m.NamedPresent("L"); !ok || err != nil { 197 | t.Errorf("NamedPresent(\"L\"): %v", err) 198 | } 199 | if ok, err := m.NamedPresent("M"); ok || err != nil { 200 | t.Errorf("NamedPresent(\"M\"): %v", err) 201 | } 202 | if ok, err := m.NamedPresent("DIGITS"); !ok || err != nil { 203 | t.Errorf("NamedPresent(\"DIGITS\"): %v", err) 204 | } 205 | if str, err := m.NamedString("DIGITS"); str != "12" || err != nil { 206 | t.Errorf("NamedString(\"DIGITS\"): %v", err) 207 | } 208 | } 209 | 210 | func TestMatcherIndex(t *testing.T) { 211 | m := MustCompile("bcd", 0).Matcher([]byte("abcdef"), 0) 212 | i := m.Index() 213 | if i[0] != 1 { 214 | t.Error("FindIndex start", i[0]) 215 | } 216 | if i[1] != 4 { 217 | t.Error("FindIndex end", i[1]) 218 | } 219 | 220 | m = MustCompile("xyz", 0).Matcher([]byte("abcdef"), 0) 221 | i = m.Index() 222 | if i != nil { 223 | t.Error("Index returned for non-match", i) 224 | } 225 | } 226 | 227 | func TestFindIndex(t *testing.T) { 228 | re := MustCompile("bcd", 0) 229 | i := re.FindIndex([]byte("abcdef"), 0) 230 | if i[0] != 1 { 231 | t.Error("FindIndex start", i[0]) 232 | } 233 | if i[1] != 4 { 234 | t.Error("FindIndex end", i[1]) 235 | } 236 | } 237 | 238 | func TestExtract(t *testing.T) { 239 | re := MustCompile("b(c)(d)", 0) 240 | m := re.MatcherString("abcdef", 0) 241 | i := m.ExtractString() 242 | if i[0] != "abcdef" { 243 | t.Error("Full line unavailable: ", i[0]) 244 | } 245 | if i[1] != "c" { 246 | t.Error("First match group no as expected: ", i[1]) 247 | } 248 | if i[2] != "d" { 249 | t.Error("Second match group no as expected: ", i[2]) 250 | } 251 | } 252 | 253 | func TestReplaceAll(t *testing.T) { 254 | re := MustCompile("foo", 0) 255 | // Don't change at ends. 256 | result := re.ReplaceAll([]byte("I like foods."), []byte("car"), 0) 257 | if string(result) != "I like cards." { 258 | t.Error("ReplaceAll", result) 259 | } 260 | // Change at ends. 261 | result = re.ReplaceAll([]byte("food fight fools foo"), []byte("car"), 0) 262 | if string(result) != "card fight carls car" { 263 | t.Error("ReplaceAll2", result) 264 | } 265 | } 266 | --------------------------------------------------------------------------------