├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── pcre.go
├── pcre_fallback.h
└── pcre_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | [0-9].out
3 | *.[0-9]
4 | *.cgo?.*
5 | _cgo_*
6 | _obj
7 | _test
8 | _testmain.go
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | sudo: false
 3 | addons:
 4 |   apt:
 5 |     packages:
 6 |       - libpcre++-dev
 7 | go:
 8 |   - 1.3.3
 9 |   - 1.4.1
10 |   - 1.4.2
11 |   - 1.4.3
12 |   - 1.5.1
13 | script:
14 |   - go test -v ./...
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The Go interface for PCRE is licensed according to the terms below.
 2 | Note that PCRE and the Go implementation come with different terms.
 3 | 
 4 | Copyright (c) 2011 Florian Weimer. All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | * Redistributions of source code must retain the above copyright
11 |   notice, this list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright
14 |   notice, this list of conditions and the following disclaimer in the
15 |   documentation and/or other materials provided with the distribution.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # go-pcre
 2 | 
 3 | [![GoDoc](https://godoc.org/github.com/gijsbers/go-pcre?status.svg)](https://godoc.org/github.com/gijsbers/go-pcre)
 4 | 
 5 | This is a Go language package providing support for
 6 | Perl Compatible Regular Expressions (PCRE).
 7 | 
 8 | ## Installation
 9 | 
10 | Install the package for Debian as follows:
11 | 
12 |     sudo apt-get install libpcre++-dev
13 |     go get github.com/gijsbers/go-pcre
14 | 
15 | ## Usage
16 | 
17 | Go programs that depend on this package should import
18 | this package as follows to allow automatic downloading:
19 | 
20 |     import "github.com/gijsbers/go-pcre"
21 | 
22 | ## History
23 | 
24 | This is a clone of
25 | [golang-pkg-pcre](http://git.enyo.de/fw/debian/golang-pkg-pcre.git)
26 | by Florian Weimer, which has been placed on Github by Glenn Brown,
27 | so it can be fetched automatically by Go's package installer.
28 | 
29 | Glenn Brown added `FindIndex()` and `ReplaceAll()`
30 | to mimic functions in Go's default regexp package.
31 | 
32 | Mathieu Payeur Levallois added `Matcher.ExtractString()`.
33 | 
34 | Malte Nuhn added `GroupIndices()` to retrieve positions of a matching group.
35 | 
36 | Chandra Sekar S added `Index()` and stopped invoking `Match()` twice in `FindIndex()`.
37 | 
38 | Misakwa added support for `pkg-config` to locate `libpcre`.
39 | 
40 | Yann Ramin added `ReplaceAllString()` and changed `Compile()` return type to `error`.
41 | 
42 | Nikolay Sivko modified `name2index()` to return error instead of panic.
43 | 
44 | Harry Waye exposed raw `pcre_exec`.
45 | 
46 | Hazzadous added partial match support.
47 | 
48 | Pavel Gryaznov added support for JIT compilation.
49 | 


--------------------------------------------------------------------------------
/pcre.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2011 Florian Weimer. All rights reserved.
  2 | //
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions are
  5 | // met:
  6 | //
  7 | // * Redistributions of source code must retain the above copyright
  8 | //   notice, this list of conditions and the following disclaimer.
  9 | //
 10 | // * Redistributions in binary form must reproduce the above copyright
 11 | //   notice, this list of conditions and the following disclaimer in the
 12 | //   documentation and/or other materials provided with the distribution.
 13 | //
 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 18 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 20 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 21 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 22 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 | 
 26 | // Package pcre provides access to the Perl Compatible Regular
 27 | // Expresion library, PCRE.
 28 | //
 29 | // It implements two main types, Regexp and Matcher.  Regexp objects
 30 | // store a compiled regular expression. They consist of two immutable
 31 | // parts: pcre and pcre_extra. Compile()/MustCompile() initialize pcre.
 32 | // Calling Study() on a compiled Regexp initializes pcre_extra.
 33 | // Compilation of regular expressions using Compile or MustCompile is
 34 | // slightly expensive, so these objects should be kept and reused,
 35 | // instead of compiling them from scratch for each matching attempt.
 36 | // CompileJIT and MustCompileJIT are way more expensive, because they
 37 | // run Study() after compiling a Regexp, but they tend to give
 38 | // much better perfomance:
 39 | // http://sljit.sourceforge.net/regex_perf.html
 40 | //
 41 | // Matcher objects keeps the results of a match against a []byte or
 42 | // string subject.  The Group and GroupString functions provide access
 43 | // to capture groups; both versions work no matter if the subject was a
 44 | // []byte or string, but the version with the matching type is slightly
 45 | // more efficient.
 46 | //
 47 | // Matcher objects contain some temporary space and refer the original
 48 | // subject.  They are mutable and can be reused (using Match,
 49 | // MatchString, Reset or ResetString).
 50 | //
 51 | // For details on the regular expression language implemented by this
 52 | // package and the flags defined below, see the PCRE documentation.
 53 | // http://www.pcre.org/pcre.txt
 54 | package pcre
 55 | 
 56 | // #cgo pkg-config: libpcre
 57 | // #include <pcre.h>
 58 | // #include <string.h>
 59 | // #include "./pcre_fallback.h"
 60 | import "C"
 61 | 
 62 | import (
 63 | 	"fmt"
 64 | 	"strconv"
 65 | 	"unsafe"
 66 | )
 67 | 
 68 | // Flags for Compile and Match functions.
 69 | const (
 70 | 	ANCHORED          = C.PCRE_ANCHORED
 71 | 	BSR_ANYCRLF       = C.PCRE_BSR_ANYCRLF
 72 | 	BSR_UNICODE       = C.PCRE_BSR_UNICODE
 73 | 	NEWLINE_ANY       = C.PCRE_NEWLINE_ANY
 74 | 	NEWLINE_ANYCRLF   = C.PCRE_NEWLINE_ANYCRLF
 75 | 	NEWLINE_CR        = C.PCRE_NEWLINE_CR
 76 | 	NEWLINE_CRLF      = C.PCRE_NEWLINE_CRLF
 77 | 	NEWLINE_LF        = C.PCRE_NEWLINE_LF
 78 | 	NO_START_OPTIMIZE = C.PCRE_NO_START_OPTIMIZE
 79 | 	NO_UTF8_CHECK     = C.PCRE_NO_UTF8_CHECK
 80 | )
 81 | 
 82 | // Flags for Compile functions
 83 | const (
 84 | 	CASELESS          = C.PCRE_CASELESS
 85 | 	DOLLAR_ENDONLY    = C.PCRE_DOLLAR_ENDONLY
 86 | 	DOTALL            = C.PCRE_DOTALL
 87 | 	DUPNAMES          = C.PCRE_DUPNAMES
 88 | 	EXTENDED          = C.PCRE_EXTENDED
 89 | 	EXTRA             = C.PCRE_EXTRA
 90 | 	FIRSTLINE         = C.PCRE_FIRSTLINE
 91 | 	JAVASCRIPT_COMPAT = C.PCRE_JAVASCRIPT_COMPAT
 92 | 	MULTILINE         = C.PCRE_MULTILINE
 93 | 	NEVER_UTF         = C.PCRE_NEVER_UTF
 94 | 	NO_AUTO_CAPTURE   = C.PCRE_NO_AUTO_CAPTURE
 95 | 	UNGREEDY          = C.PCRE_UNGREEDY
 96 | 	UTF8              = C.PCRE_UTF8
 97 | 	UCP               = C.PCRE_UCP
 98 | )
 99 | 
100 | // Flags for Match functions
101 | const (
102 | 	NOTBOL           = C.PCRE_NOTBOL
103 | 	NOTEOL           = C.PCRE_NOTEOL
104 | 	NOTEMPTY         = C.PCRE_NOTEMPTY
105 | 	NOTEMPTY_ATSTART = C.PCRE_NOTEMPTY_ATSTART
106 | 	PARTIAL_HARD     = C.PCRE_PARTIAL_HARD
107 | 	PARTIAL_SOFT     = C.PCRE_PARTIAL_SOFT
108 | )
109 | 
110 | // Flags for Study function
111 | const (
112 | 	STUDY_JIT_COMPILE              = C.PCRE_STUDY_JIT_COMPILE
113 | 	STUDY_JIT_PARTIAL_SOFT_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
114 | 	STUDY_JIT_PARTIAL_HARD_COMPILE = C.PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
115 | )
116 | 
117 | // Exec-time and get/set-time error codes
118 | const (
119 | 	ERROR_NOMATCH        = C.PCRE_ERROR_NOMATCH
120 | 	ERROR_NULL           = C.PCRE_ERROR_NULL
121 | 	ERROR_BADOPTION      = C.PCRE_ERROR_BADOPTION
122 | 	ERROR_BADMAGIC       = C.PCRE_ERROR_BADMAGIC
123 | 	ERROR_UNKNOWN_OPCODE = C.PCRE_ERROR_UNKNOWN_OPCODE
124 | 	ERROR_UNKNOWN_NODE   = C.PCRE_ERROR_UNKNOWN_NODE
125 | 	ERROR_NOMEMORY       = C.PCRE_ERROR_NOMEMORY
126 | 	ERROR_NOSUBSTRING    = C.PCRE_ERROR_NOSUBSTRING
127 | 	ERROR_MATCHLIMIT     = C.PCRE_ERROR_MATCHLIMIT
128 | 	ERROR_CALLOUT        = C.PCRE_ERROR_CALLOUT
129 | 	ERROR_BADUTF8        = C.PCRE_ERROR_BADUTF8
130 | 	ERROR_BADUTF8_OFFSET = C.PCRE_ERROR_BADUTF8_OFFSET
131 | 	ERROR_PARTIAL        = C.PCRE_ERROR_PARTIAL
132 | 	ERROR_BADPARTIAL     = C.PCRE_ERROR_BADPARTIAL
133 | 	ERROR_RECURSIONLIMIT = C.PCRE_ERROR_RECURSIONLIMIT
134 | 	ERROR_INTERNAL       = C.PCRE_ERROR_INTERNAL
135 | 	ERROR_BADCOUNT       = C.PCRE_ERROR_BADCOUNT
136 | 	ERROR_JIT_STACKLIMIT = C.PCRE_ERROR_JIT_STACKLIMIT
137 | )
138 | 
139 | // Regexp holds a reference to a compiled regular expression.
140 | // Use Compile or MustCompile to create such objects.
141 | type Regexp struct {
142 | 	ptr   []byte
143 | 	extra []byte
144 | }
145 | 
146 | // Number of bytes in the compiled pattern
147 | func pcreSize(ptr *C.pcre) (size C.size_t) {
148 | 	C.pcre_fullinfo(ptr, nil, C.PCRE_INFO_SIZE, unsafe.Pointer(&size))
149 | 	return
150 | }
151 | 
152 | // Number of capture groups
153 | func pcreGroups(ptr *C.pcre) (count C.int) {
154 | 	C.pcre_fullinfo(ptr, nil,
155 | 		C.PCRE_INFO_CAPTURECOUNT, unsafe.Pointer(&count))
156 | 	return
157 | }
158 | 
159 | // Move pattern to the Go heap so that we do not have to use a
160 | // finalizer.  PCRE patterns are fully relocatable. (We do not use
161 | // custom character tables.)
162 | func toHeap(ptr *C.pcre) (re Regexp) {
163 | 	defer C.free(unsafe.Pointer(ptr))
164 | 	size := pcreSize(ptr)
165 | 	re.ptr = make([]byte, size)
166 | 	C.memcpy(unsafe.Pointer(&re.ptr[0]), unsafe.Pointer(ptr), size)
167 | 	return
168 | }
169 | 
170 | // Compile the pattern and return a compiled regexp.
171 | // If compilation fails, the second return value holds a *CompileError.
172 | func Compile(pattern string, flags int) (Regexp, error) {
173 | 	pattern1 := C.CString(pattern)
174 | 	defer C.free(unsafe.Pointer(pattern1))
175 | 	if clen := int(C.strlen(pattern1)); clen != len(pattern) {
176 | 		return Regexp{}, &CompileError{
177 | 			Pattern: pattern,
178 | 			Message: "NUL byte in pattern",
179 | 			Offset:  clen,
180 | 		}
181 | 	}
182 | 	var errptr *C.char
183 | 	var erroffset C.int
184 | 	ptr := C.pcre_compile(pattern1, C.int(flags), &errptr, &erroffset, nil)
185 | 	if ptr == nil {
186 | 		return Regexp{}, &CompileError{
187 | 			Pattern: pattern,
188 | 			Message: C.GoString(errptr),
189 | 			Offset:  int(erroffset),
190 | 		}
191 | 	}
192 | 	heap := toHeap(ptr)
193 | 	return heap, nil
194 | }
195 | 
196 | // CompileJIT is a combination of Compile and Study. It first compiles
197 | // the pattern and if this succeeds calls Study on the compiled pattern.
198 | // comFlags are Compile flags, jitFlags are study flags.
199 | // If compilation fails, the second return value holds a *CompileError.
200 | func CompileJIT(pattern string, comFlags, jitFlags int) (Regexp, error) {
201 | 	re, err := Compile(pattern, comFlags)
202 | 	if err == nil {
203 | 		err = (&re).Study(jitFlags)
204 | 	}
205 | 	return re, err
206 | }
207 | 
208 | // MustCompile compiles the pattern.  If compilation fails, panic.
209 | func MustCompile(pattern string, flags int) (re Regexp) {
210 | 	re, err := Compile(pattern, flags)
211 | 	if err != nil {
212 | 		panic(err)
213 | 	}
214 | 	return
215 | }
216 | 
217 | // MustCompileJIT compiles and studies the pattern.  On failure it panics.
218 | func MustCompileJIT(pattern string, comFlags, jitFlags int) (re Regexp) {
219 | 	re, err := CompileJIT(pattern, comFlags, jitFlags)
220 | 	if err != nil {
221 | 		panic(err)
222 | 	}
223 | 	return
224 | }
225 | 
226 | // Study adds Just-In-Time compilation to a Regexp. This may give a huge
227 | // speed boost when matching. If an error occurs, return value is non-nil.
228 | // Flags optionally specifies JIT compilation options for partial matches.
229 | func (re *Regexp) Study(flags int) error {
230 | 	if re.extra != nil {
231 | 		return fmt.Errorf("Study: Regexp has already been optimized")
232 | 	}
233 | 	if flags == 0 {
234 | 		flags = STUDY_JIT_COMPILE
235 | 	}
236 | 
237 | 	ptr := (*C.pcre)(unsafe.Pointer(&re.ptr[0]))
238 | 	var err *C.char
239 | 	extra := C.pcre_study(ptr, C.int(flags), &err)
240 | 	if err != nil {
241 | 		return fmt.Errorf("%s", C.GoString(err))
242 | 	}
243 | 	if extra == nil {
244 | 		// Studying the pattern may not produce useful information.
245 | 		return nil
246 | 	}
247 | 	defer C.free(unsafe.Pointer(extra))
248 | 
249 | 	var size C.size_t
250 | 	rc := C.pcre_fullinfo(ptr, extra, C.PCRE_INFO_JITSIZE, unsafe.Pointer(&size))
251 | 	if rc != 0 || size == 0 {
252 | 		return fmt.Errorf("Study failed to obtain JIT size (%d)", int(rc))
253 | 	}
254 | 	re.extra = make([]byte, size)
255 | 	C.memcpy(unsafe.Pointer(&re.extra[0]), unsafe.Pointer(extra), size)
256 | 	return nil
257 | }
258 | 
259 | // Groups returns the number of capture groups in the compiled pattern.
260 | func (re Regexp) Groups() int {
261 | 	if re.ptr == nil {
262 | 		panic("Regexp.Groups: uninitialized")
263 | 	}
264 | 	return int(pcreGroups((*C.pcre)(unsafe.Pointer(&re.ptr[0]))))
265 | }
266 | 
267 | // Matcher objects provide a place for storing match results.
268 | // They can be created by the Matcher and MatcherString functions,
269 | // or they can be initialized with Reset or ResetString.
270 | type Matcher struct {
271 | 	re       Regexp
272 | 	groups   int
273 | 	ovector  []C.int // scratch space for capture offsets
274 | 	matches  bool    // last match was successful
275 | 	partial  bool    // was the last match a partial match?
276 | 	subjects string  // one of these fields is set to record the subject,
277 | 	subjectb []byte  // so that Group/GroupString can return slices
278 | }
279 | 
280 | // NewMatcher creates a new matcher object for the given Regexp.
281 | func (re Regexp) NewMatcher() (m *Matcher) {
282 | 	m = new(Matcher)
283 | 	m.Init(&re)
284 | 	return
285 | }
286 | 
287 | // Matcher creates a new matcher object, with the byte slice as subject.
288 | // It also starts a first match on subject. Test for success with Matches().
289 | func (re Regexp) Matcher(subject []byte, flags int) (m *Matcher) {
290 | 	m = re.NewMatcher()
291 | 	m.Match(subject, flags)
292 | 	return
293 | }
294 | 
295 | // MatcherString creates a new matcher, with the specified subject string.
296 | // It also starts a first match on subject. Test for success with Matches().
297 | func (re Regexp) MatcherString(subject string, flags int) (m *Matcher) {
298 | 	m = re.NewMatcher()
299 | 	m.MatchString(subject, flags)
300 | 	return
301 | }
302 | 
303 | // Reset switches the matcher object to the specified regexp and subject.
304 | // It also starts a first match on subject.
305 | func (m *Matcher) Reset(re Regexp, subject []byte, flags int) bool {
306 | 	m.Init(&re)
307 | 	return m.Match(subject, flags)
308 | }
309 | 
310 | // ResetString switches the matcher object to the given regexp and subject.
311 | // It also starts a first match on subject.
312 | func (m *Matcher) ResetString(re Regexp, subject string, flags int) bool {
313 | 	m.Init(&re)
314 | 	return m.MatchString(subject, flags)
315 | }
316 | 
317 | // Init binds an existing Matcher object to the given Regexp.
318 | func (m *Matcher) Init(re *Regexp) {
319 | 	if re.ptr == nil {
320 | 		panic("Matcher.Init: uninitialized")
321 | 	}
322 | 	m.matches = false
323 | 	if m.re.ptr != nil && &m.re.ptr[0] == &re.ptr[0] {
324 | 		// Skip group count extraction if the matcher has
325 | 		// already been initialized with the same regular
326 | 		// expression.
327 | 		return
328 | 	}
329 | 	m.re = *re
330 | 	m.groups = re.Groups()
331 | 	if ovectorlen := 3 * (1 + m.groups); len(m.ovector) < ovectorlen {
332 | 		m.ovector = make([]C.int, ovectorlen)
333 | 	}
334 | }
335 | 
336 | var nullbyte = []byte{0}
337 | 
338 | // Match tries to match the specified byte slice to
339 | // the current pattern by calling Exec and collects the result.
340 | // Returns true if the match succeeds.
341 | func (m *Matcher) Match(subject []byte, flags int) bool {
342 | 	if m.re.ptr == nil {
343 | 		panic("Matcher.Match: uninitialized")
344 | 	}
345 | 	rc := m.Exec(subject, flags)
346 | 	m.matches = matched(rc)
347 | 	m.partial = (rc == ERROR_PARTIAL)
348 | 	return m.matches
349 | }
350 | 
351 | // MatchString tries to match the specified subject string to
352 | // the current pattern by calling ExecString and collects the result.
353 | // Returns true if the match succeeds.
354 | func (m *Matcher) MatchString(subject string, flags int) bool {
355 | 	if m.re.ptr == nil {
356 | 		panic("Matcher.MatchString: uninitialized")
357 | 	}
358 | 	rc := m.ExecString(subject, flags)
359 | 	m.matches = matched(rc)
360 | 	m.partial = (rc == ERROR_PARTIAL)
361 | 	return m.matches
362 | }
363 | 
364 | // Exec tries to match the specified byte slice to
365 | // the current pattern. Returns the raw pcre_exec error code.
366 | func (m *Matcher) Exec(subject []byte, flags int) int {
367 | 	if m.re.ptr == nil {
368 | 		panic("Matcher.Exec: uninitialized")
369 | 	}
370 | 	length := len(subject)
371 | 	m.subjects = ""
372 | 	m.subjectb = subject
373 | 	if length == 0 {
374 | 		subject = nullbyte // make first character adressable
375 | 	}
376 | 	subjectptr := (*C.char)(unsafe.Pointer(&subject[0]))
377 | 	return m.exec(subjectptr, length, flags)
378 | }
379 | 
380 | // ExecString tries to match the specified subject string to
381 | // the current pattern. It returns the raw pcre_exec error code.
382 | func (m *Matcher) ExecString(subject string, flags int) int {
383 | 	if m.re.ptr == nil {
384 | 		panic("Matcher.ExecString: uninitialized")
385 | 	}
386 | 	length := len(subject)
387 | 	m.subjects = subject
388 | 	m.subjectb = nil
389 | 	if length == 0 {
390 | 		subject = "\000" // make first character addressable
391 | 	}
392 | 	// The following is a non-portable kludge to avoid a copy
393 | 	subjectptr := *(**C.char)(unsafe.Pointer(&subject))
394 | 	return m.exec(subjectptr, length, flags)
395 | }
396 | 
397 | func (m *Matcher) exec(subjectptr *C.char, length, flags int) int {
398 | 	var extra *C.pcre_extra
399 | 	if m.re.extra != nil {
400 | 		extra = (*C.pcre_extra)(unsafe.Pointer(&m.re.extra[0]))
401 | 	}
402 | 	rc := C.pcre_exec((*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), extra,
403 | 		subjectptr, C.int(length),
404 | 		0, C.int(flags), &m.ovector[0], C.int(len(m.ovector)))
405 | 	return int(rc)
406 | }
407 | 
408 | // matched checks the return code of a pattern match for success.
409 | func matched(rc int) bool {
410 | 	switch {
411 | 	case rc >= 0 || rc == C.PCRE_ERROR_PARTIAL:
412 | 		return true
413 | 	case rc == C.PCRE_ERROR_NOMATCH:
414 | 		return false
415 | 	case rc == C.PCRE_ERROR_BADOPTION:
416 | 		panic("PCRE.Match: invalid option flag")
417 | 	}
418 | 	panic("unexpected return code from pcre_exec: " + strconv.Itoa(rc))
419 | }
420 | 
421 | // Matches returns true if a previous call to Matcher, MatcherString, Reset,
422 | // ResetString, Match or MatchString succeeded.
423 | func (m *Matcher) Matches() bool {
424 | 	return m.matches
425 | }
426 | 
427 | // Partial returns true if a previous call to Matcher, MatcherString, Reset,
428 | // ResetString, Match or MatchString found a partial match.
429 | func (m *Matcher) Partial() bool {
430 | 	return m.partial
431 | }
432 | 
433 | // Groups returns the number of groups in the current pattern.
434 | func (m *Matcher) Groups() int {
435 | 	return m.groups
436 | }
437 | 
438 | // Present returns true if the numbered capture group is present in the last
439 | // match (performed by Matcher, MatcherString, Reset, ResetString,
440 | // Match, or MatchString).  Group numbers start at 1.  A capture group
441 | // can be present and match the empty string.
442 | func (m *Matcher) Present(group int) bool {
443 | 	return m.ovector[2*group] >= 0
444 | }
445 | 
446 | // Group returns the numbered capture group of the last match (performed by
447 | // Matcher, MatcherString, Reset, ResetString, Match, or MatchString).
448 | // Group 0 is the part of the subject which matches the whole pattern;
449 | // the first actual capture group is numbered 1.  Capture groups which
450 | // are not present return a nil slice.
451 | func (m *Matcher) Group(group int) []byte {
452 | 	start := m.ovector[2*group]
453 | 	end := m.ovector[2*group+1]
454 | 	if start >= 0 {
455 | 		if m.subjectb != nil {
456 | 			return m.subjectb[start:end]
457 | 		}
458 | 		return []byte(m.subjects[start:end])
459 | 	}
460 | 	return nil
461 | }
462 | 
463 | // Extract returns a slice of byte slices for a single match.
464 | // The first byte slice contains the complete match.
465 | // Subsequent byte slices contain the captured groups.
466 | // If there was no match then nil is returned.
467 | func (m *Matcher) Extract() [][]byte {
468 | 	if !m.matches {
469 | 		return nil
470 | 	}
471 | 	extract := make([][]byte, m.groups+1)
472 | 	extract[0] = m.subjectb
473 | 	for i := 1; i <= m.groups; i++ {
474 | 		x0 := m.ovector[2*i]
475 | 		x1 := m.ovector[2*i+1]
476 | 		extract[i] = m.subjectb[x0:x1]
477 | 	}
478 | 	return extract
479 | }
480 | 
481 | // ExtractString returns a slice of strings for a single match.
482 | // The first string contains the complete match.
483 | // Subsequent strings in the slice contain the captured groups.
484 | // If there was no match then nil is returned.
485 | func (m *Matcher) ExtractString() []string {
486 | 	if !m.matches {
487 | 		return nil
488 | 	}
489 | 	extract := make([]string, m.groups+1)
490 | 	extract[0] = m.subjects
491 | 	for i := 1; i <= m.groups; i++ {
492 | 		x0 := m.ovector[2*i]
493 | 		x1 := m.ovector[2*i+1]
494 | 		extract[i] = m.subjects[x0:x1]
495 | 	}
496 | 	return extract
497 | }
498 | 
499 | // GroupIndices returns the numbered capture group positions of the last
500 | // match (performed by Matcher, MatcherString, Reset, ResetString, Match,
501 | // or MatchString). Group 0 is the part of the subject which matches
502 | // the whole pattern; the first actual capture group is numbered 1.
503 | // Capture groups which are not present return a nil slice.
504 | func (m *Matcher) GroupIndices(group int) []int {
505 | 	start := m.ovector[2*group]
506 | 	end := m.ovector[2*group+1]
507 | 	if start >= 0 {
508 | 		return []int{int(start), int(end)}
509 | 	}
510 | 	return nil
511 | }
512 | 
513 | // GroupString returns the numbered capture group as a string.  Group 0
514 | // is the part of the subject which matches the whole pattern; the first
515 | // actual capture group is numbered 1.  Capture groups which are not
516 | // present return an empty string.
517 | func (m *Matcher) GroupString(group int) string {
518 | 	start := m.ovector[2*group]
519 | 	end := m.ovector[2*group+1]
520 | 	if start >= 0 {
521 | 		if m.subjectb != nil {
522 | 			return string(m.subjectb[start:end])
523 | 		}
524 | 		return m.subjects[start:end]
525 | 	}
526 | 	return ""
527 | }
528 | 
529 | // Index returns the start and end of the first match, if a previous
530 | // call to Matcher, MatcherString, Reset, ResetString, Match or
531 | // MatchString succeeded. loc[0] is the start and loc[1] is the end.
532 | func (m *Matcher) Index() (loc []int) {
533 | 	if !m.matches {
534 | 		return nil
535 | 	}
536 | 	loc = []int{int(m.ovector[0]), int(m.ovector[1])}
537 | 	return
538 | }
539 | 
540 | // name2index converts a group name to its group index number.
541 | func (m *Matcher) name2index(name string) (int, error) {
542 | 	if m.re.ptr == nil {
543 | 		return 0, fmt.Errorf("Matcher.Named: uninitialized")
544 | 	}
545 | 	name1 := C.CString(name)
546 | 	defer C.free(unsafe.Pointer(name1))
547 | 	group := int(C.pcre_get_stringnumber(
548 | 		(*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), name1))
549 | 	if group < 0 {
550 | 		return group, fmt.Errorf("Matcher.Named: unknown name: " + name)
551 | 	}
552 | 	return group, nil
553 | }
554 | 
555 | // Named returns the value of the named capture group.
556 | // This is a nil slice if the capture group is not present.
557 | // If the name does not refer to a group then error is non-nil.
558 | func (m *Matcher) Named(group string) ([]byte, error) {
559 | 	groupNum, err := m.name2index(group)
560 | 	if err != nil {
561 | 		return []byte{}, err
562 | 	}
563 | 	return m.Group(groupNum), nil
564 | }
565 | 
566 | // NamedString returns the value of the named capture group,
567 | // or an empty string if the capture group is not present.
568 | // If the name does not refer to a group then error is non-nil.
569 | func (m *Matcher) NamedString(group string) (string, error) {
570 | 	groupNum, err := m.name2index(group)
571 | 	if err != nil {
572 | 		return "", err
573 | 	}
574 | 	return m.GroupString(groupNum), nil
575 | }
576 | 
577 | // NamedPresent returns true if the named capture group is present.
578 | // If the name does not refer to a group then error is non-nil.
579 | func (m *Matcher) NamedPresent(group string) (bool, error) {
580 | 	groupNum, err := m.name2index(group)
581 | 	if err != nil {
582 | 		return false, err
583 | 	}
584 | 	return m.Present(groupNum), nil
585 | }
586 | 
587 | // FindIndex returns the start and end of the first match,
588 | // or nil if no match.  loc[0] is the start and loc[1] is the end.
589 | func (re *Regexp) FindIndex(bytes []byte, flags int) (loc []int) {
590 | 	m := re.Matcher(bytes, flags)
591 | 	if m.Matches() {
592 | 		loc = []int{int(m.ovector[0]), int(m.ovector[1])}
593 | 		return
594 | 	}
595 | 	return nil
596 | }
597 | 
598 | // ReplaceAll returns a copy of a byte slice
599 | // where all pattern matches are replaced by repl.
600 | func (re Regexp) ReplaceAll(bytes, repl []byte, flags int) []byte {
601 | 	m := re.Matcher(bytes, flags)
602 | 	r := []byte{}
603 | 	for m.matches {
604 | 		r = append(append(r, bytes[:m.ovector[0]]...), repl...)
605 | 		bytes = bytes[m.ovector[1]:]
606 | 		m.Match(bytes, flags)
607 | 	}
608 | 	return append(r, bytes...)
609 | }
610 | 
611 | // ReplaceAllString is equivalent to ReplaceAll with string return type.
612 | func (re Regexp) ReplaceAllString(in, repl string, flags int) string {
613 | 	return string(re.ReplaceAll([]byte(in), []byte(repl), flags))
614 | }
615 | 
616 | // CompileError holds details about a compilation error,
617 | // as returned by the Compile function.  The offset is
618 | // the byte position in the pattern string at which the
619 | // error was detected.
620 | type CompileError struct {
621 | 	Pattern string // The failed pattern
622 | 	Message string // The error message
623 | 	Offset  int    // Byte position of error
624 | }
625 | 
626 | // Error converts a compile error to a string
627 | func (e *CompileError) Error() string {
628 | 	return e.Pattern + " (" + strconv.Itoa(e.Offset) + "): " + e.Message
629 | }
630 | 


--------------------------------------------------------------------------------
/pcre_fallback.h:
--------------------------------------------------------------------------------
1 | #ifndef PCRE_NEVER_UTF
2 | #define PCRE_NEVER_UTF 0x0
3 | #endif
4 | 


--------------------------------------------------------------------------------
/pcre_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2011 Florian Weimer <fw@deneb.enyo.de>
  2 | 
  3 | package pcre
  4 | 
  5 | import (
  6 | 	"testing"
  7 | )
  8 | 
  9 | func TestCompile(t *testing.T) {
 10 | 	var check = func(p string, groups int) {
 11 | 		re, err := Compile(p, 0)
 12 | 		if err != nil {
 13 | 			t.Error(p, err)
 14 | 		}
 15 | 		if g := re.Groups(); g != groups {
 16 | 			t.Error(p, g)
 17 | 		}
 18 | 	}
 19 | 	check("", 0)
 20 | 	check("^", 0)
 21 | 	check("^$", 0)
 22 | 	check("()", 1)
 23 | 	check("(())", 2)
 24 | 	check("((?:))", 1)
 25 | }
 26 | 
 27 | func TestCompileFail(t *testing.T) {
 28 | 	var check = func(p, msg string, off int) {
 29 | 		_, err := Compile(p, 0)
 30 | 		if err == nil {
 31 | 			t.Error(p)
 32 | 		} else {
 33 | 			cerr := err.(*CompileError)
 34 | 			switch {
 35 | 			case cerr.Message != msg:
 36 | 				t.Error(p, "Message", cerr.Message)
 37 | 			case cerr.Offset != off:
 38 | 				t.Error(p, "Offset", cerr.Offset)
 39 | 			}
 40 | 		}
 41 | 	}
 42 | 	check("(", "missing )", 1)
 43 | 	check("\\", "\\ at end of pattern", 1)
 44 | 	check("abc\\", "\\ at end of pattern", 4)
 45 | 	check("abc\000", "NUL byte in pattern", 3)
 46 | 	check("a\000bc", "NUL byte in pattern", 1)
 47 | }
 48 | 
 49 | func strings(b [][]byte) (r []string) {
 50 | 	r = make([]string, len(b))
 51 | 	for i, v := range b {
 52 | 		r[i] = string(v)
 53 | 	}
 54 | 	return
 55 | }
 56 | 
 57 | func equal(l, r []string) bool {
 58 | 	if len(l) != len(r) {
 59 | 		return false
 60 | 	}
 61 | 	for i, lv := range l {
 62 | 		if lv != r[i] {
 63 | 			return false
 64 | 		}
 65 | 	}
 66 | 	return true
 67 | }
 68 | 
 69 | func checkmatch1(t *testing.T, dostring bool, m *Matcher,
 70 | 	pattern, subject string, args ...interface{}) {
 71 | 	re := MustCompile(pattern, 0)
 72 | 	var prefix string
 73 | 	if dostring {
 74 | 		if m == nil {
 75 | 			m = re.MatcherString(subject, 0)
 76 | 		} else {
 77 | 			m.ResetString(re, subject, 0)
 78 | 		}
 79 | 		prefix = "string"
 80 | 	} else {
 81 | 		if m == nil {
 82 | 			m = re.Matcher([]byte(subject), 0)
 83 | 		} else {
 84 | 			m.Reset(re, []byte(subject), 0)
 85 | 		}
 86 | 		prefix = "[]byte"
 87 | 	}
 88 | 	if len(args) == 0 {
 89 | 		if m.Matches() {
 90 | 			t.Error(prefix, pattern, subject, "!Matches")
 91 | 		}
 92 | 	} else {
 93 | 		if !m.Matches() {
 94 | 			t.Error(prefix, pattern, subject, "Matches")
 95 | 			return
 96 | 		}
 97 | 		if m.Groups() != len(args)-1 {
 98 | 			t.Error(prefix, pattern, subject, "Groups", m.Groups())
 99 | 			return
100 | 		}
101 | 		for i, arg := range args {
102 | 			if s, ok := arg.(string); ok {
103 | 				if !m.Present(i) {
104 | 					t.Error(prefix, pattern, subject,
105 | 						"Present", i)
106 | 
107 | 				}
108 | 				if g := string(m.Group(i)); g != s {
109 | 					t.Error(prefix, pattern, subject,
110 | 						"Group", i, g, "!=", s)
111 | 				}
112 | 				if g := m.GroupString(i); g != s {
113 | 					t.Error(prefix, pattern, subject,
114 | 						"GroupString", i, g, "!=", s)
115 | 				}
116 | 			} else {
117 | 				if m.Present(i) {
118 | 					t.Error(prefix, pattern, subject,
119 | 						"!Present", i)
120 | 				}
121 | 			}
122 | 		}
123 | 	}
124 | }
125 | 
126 | func TestMatcher(t *testing.T) {
127 | 	var m Matcher
128 | 	check := func(pattern, subject string, args ...interface{}) {
129 | 		checkmatch1(t, false, nil, pattern, subject, args...)
130 | 		checkmatch1(t, true, nil, pattern, subject, args...)
131 | 		checkmatch1(t, false, &m, pattern, subject, args...)
132 | 		checkmatch1(t, true, &m, pattern, subject, args...)
133 | 	}
134 | 
135 | 	check(`^$`, "", "")
136 | 	check(`^abc$`, "abc", "abc")
137 | 	check(`^(X)*ab(c)$`, "abc", "abc", nil, "c")
138 | 	check(`^(X)*ab()c$`, "abc", "abc", nil, "")
139 | 	check(`^.*$`, "abc", "abc")
140 | 	check(`^.*$`, "a\000c", "a\000c")
141 | 	check(`^(.*)$`, "a\000c", "a\000c", "a\000c")
142 | 	check(`def`, "abcdefghi", "def")
143 | }
144 | 
145 | func TestPartial(t *testing.T) {
146 | 	re := MustCompile(`^abc`, 0)
147 | 
148 | 	// Check we get a partial match when we should
149 | 	m := re.MatcherString("ab", PARTIAL_SOFT)
150 | 	if !m.Matches() {
151 | 		t.Error("Failed to find any matches")
152 | 	} else if !m.Partial() {
153 | 		t.Error("The match was not partial")
154 | 	}
155 | 
156 | 	// Check we get an exact match when we should
157 | 	m = re.MatcherString("abc", PARTIAL_SOFT)
158 | 	if !m.Matches() {
159 | 		t.Error("Failed to find any matches")
160 | 	} else if m.Partial() {
161 | 		t.Error("Match was partial but should have been exact")
162 | 	}
163 | 
164 | 	m = re.Matcher([]byte("ab"), PARTIAL_SOFT)
165 | 	if !m.Matches() {
166 | 		t.Error("Failed to find any matches")
167 | 	} else if !m.Partial() {
168 | 		t.Error("The match was not partial")
169 | 	}
170 | 
171 | 	m = re.Matcher([]byte("abc"), PARTIAL_SOFT)
172 | 	if !m.Matches() {
173 | 		t.Error("Failed to find any matches")
174 | 	} else if m.Partial() {
175 | 		t.Error("Match was partial but should have been exact")
176 | 	}
177 | }
178 | 
179 | func TestCaseless(t *testing.T) {
180 | 	m := MustCompile("abc", CASELESS).MatcherString("...Abc...", 0)
181 | 	if !m.Matches() {
182 | 		t.Error("CASELESS")
183 | 	}
184 | 	m = MustCompile("abc", 0).MatcherString("Abc", 0)
185 | 	if m.Matches() {
186 | 		t.Error("!CASELESS")
187 | 	}
188 | }
189 | 
190 | func TestNamed(t *testing.T) {
191 | 	pattern := "(?<L>a)(?<M>X)*bc(?<DIGITS>\\d*)"
192 | 	m := MustCompile(pattern, 0).MatcherString("abc12", 0)
193 | 	if !m.Matches() {
194 | 		t.Error("Matches")
195 | 	}
196 | 	if ok, err := m.NamedPresent("L"); !ok || err != nil {
197 | 		t.Errorf("NamedPresent(\"L\"): %v", err)
198 | 	}
199 | 	if ok, err := m.NamedPresent("M"); ok || err != nil {
200 | 		t.Errorf("NamedPresent(\"M\"): %v", err)
201 | 	}
202 | 	if ok, err := m.NamedPresent("DIGITS"); !ok || err != nil {
203 | 		t.Errorf("NamedPresent(\"DIGITS\"): %v", err)
204 | 	}
205 | 	if str, err := m.NamedString("DIGITS"); str != "12" || err != nil {
206 | 		t.Errorf("NamedString(\"DIGITS\"): %v", err)
207 | 	}
208 | }
209 | 
210 | func TestMatcherIndex(t *testing.T) {
211 | 	m := MustCompile("bcd", 0).Matcher([]byte("abcdef"), 0)
212 | 	i := m.Index()
213 | 	if i[0] != 1 {
214 | 		t.Error("FindIndex start", i[0])
215 | 	}
216 | 	if i[1] != 4 {
217 | 		t.Error("FindIndex end", i[1])
218 | 	}
219 | 
220 | 	m = MustCompile("xyz", 0).Matcher([]byte("abcdef"), 0)
221 | 	i = m.Index()
222 | 	if i != nil {
223 | 		t.Error("Index returned for non-match", i)
224 | 	}
225 | }
226 | 
227 | func TestFindIndex(t *testing.T) {
228 | 	re := MustCompile("bcd", 0)
229 | 	i := re.FindIndex([]byte("abcdef"), 0)
230 | 	if i[0] != 1 {
231 | 		t.Error("FindIndex start", i[0])
232 | 	}
233 | 	if i[1] != 4 {
234 | 		t.Error("FindIndex end", i[1])
235 | 	}
236 | }
237 | 
238 | func TestExtract(t *testing.T) {
239 | 	re := MustCompile("b(c)(d)", 0)
240 | 	m := re.MatcherString("abcdef", 0)
241 | 	i := m.ExtractString()
242 | 	if i[0] != "abcdef" {
243 | 		t.Error("Full line unavailable: ", i[0])
244 | 	}
245 | 	if i[1] != "c" {
246 | 		t.Error("First match group no as expected: ", i[1])
247 | 	}
248 | 	if i[2] != "d" {
249 | 		t.Error("Second match group no as expected: ", i[2])
250 | 	}
251 | }
252 | 
253 | func TestReplaceAll(t *testing.T) {
254 | 	re := MustCompile("foo", 0)
255 | 	// Don't change at ends.
256 | 	result := re.ReplaceAll([]byte("I like foods."), []byte("car"), 0)
257 | 	if string(result) != "I like cards." {
258 | 		t.Error("ReplaceAll", result)
259 | 	}
260 | 	// Change at ends.
261 | 	result = re.ReplaceAll([]byte("food fight fools foo"), []byte("car"), 0)
262 | 	if string(result) != "card fight carls car" {
263 | 		t.Error("ReplaceAll2", result)
264 | 	}
265 | }
266 | 


--------------------------------------------------------------------------------