├── .gitignore
├── README.markdown
├── debian
    ├── .gitignore
    ├── changelog
    ├── compat
    ├── control
    ├── copyright
    └── rules
└── src
    └── pkg
        └── pcre
            ├── Makefile
            ├── pcre.go
            └── pcre_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | [0-9].out
3 | *.[0-9]
4 | *.cgo?.*
5 | _cgo_*
6 | _obj
7 | _test
8 | _testmain.go
9 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | golang-pkg-pcre
 2 | ===============
 3 | 
 4 | This is a Go language package providing Perl-Compatible RegularExpression
 5 | support using libpcre++.  Install the package with the current Debian
 6 | distribution as follows:
 7 | 
 8 |     sudo apt-get install libpcre++-dev
 9 |     go get github.com/glenn-brown/golang-pkg-pcre/src/pkg/pcre
10 | 
11 | Go programs that depend on this package should import this package as
12 | follows to allow automatic downloading:
13 | 
14 |     import "github.com/glenn-brown/golang-pkg-pcre/src/pkg/pcre"
15 | 
16 | This is a clone of
17 | [golang-pkg-pcre](http://git.enyo.de/fw/debian/golang-pkg-pcre.git)
18 | by Florian Weimer, which has been placed on github so it can be fetched by
19 | Go's automatic package installer.  The `FindIndex()` and `ReplaceAll()`
20 | functions were added by Glenn Brown, to mimic functions in Go's default
21 | regexp package.
22 | 


--------------------------------------------------------------------------------
/debian/.gitignore:
--------------------------------------------------------------------------------
1 | *.debhelper
2 | *.log
3 | *.substvars
4 | /files
5 | /golang-pkg-pcre
6 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | golang-pkg-pcre (1) unstable; urgency=low
2 | 
3 |   * Initial version
4 | 
5 |  -- Florian Weimer <fw@deneb.enyo.de>  Sat, 26 Feb 2011 20:39:33 +0100
6 | 
7 | 


--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 8
2 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: golang-pkg-pcre
 2 | Maintainer: Florian Weimer <fw@deneb.enyo.de>
 3 | Section: devel
 4 | Priority: optional
 5 | Build-Depends: debhelper (>= 8),
 6 |  golang-go,
 7 |  libpcre3-dev
 8 | 
 9 | Package: golang-pkg-pcre
10 | Architecture: any
11 | Depends: golang-go
12 | Description: Perl 5 Compatible Regular Expression Library (Go interface)
13 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
 1 | The Go interface for PCRE is licensed according to the terms below.
 2 | Note that PCRE and the Go implementation come with different terms.
 3 | 
 4 | Copyright (c) 2011 Florian Weimer. All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | * Redistributions of source code must retain the above copyright
11 |   notice, this list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright
14 |   notice, this list of conditions and the following disclaimer in the
15 |   documentation and/or other materials provided with the distribution.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | %:
 3 | 	dh $@
 4 | 
 5 | override_dh_auto_build:
 6 | 	gomake -C src/pkg/pcre
 7 | 
 8 | override_dh_auto_install:
 9 | 	gomake -C src/pkg/pcre DESTDIR=$(CURDIR)/debian/golang-pkg-pcre \
10 | 	  install-debian
11 | 	install -D src/pkg/pcre/pcre.go \
12 | 	  debian/golang-pkg-pcre/usr/lib/golang/src/pkg/pcre/pcre.go
13 | 


--------------------------------------------------------------------------------
/src/pkg/pcre/Makefile:
--------------------------------------------------------------------------------
 1 | include $(GOROOT)/src/Make.inc
 2 | 
 3 | TARG=pcre
 4 | 
 5 | CGOFILES=\
 6 | 	pcre.go
 7 | 
 8 | include $(GOROOT)/src/Make.pkg
 9 | 
10 | .PHONY: install-debian
11 | install-debian:
12 | 	install -D _obj/$(TARG).a $(DESTDIR)/$(pkgdir)/$(TARG).a 
13 | 


--------------------------------------------------------------------------------
/src/pkg/pcre/pcre.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2011 Florian Weimer. All rights reserved.
  2 | // 
  3 | // Redistribution and use in source and binary forms, with or without
  4 | // modification, are permitted provided that the following conditions are
  5 | // met:
  6 | // 
  7 | // * Redistributions of source code must retain the above copyright
  8 | //   notice, this list of conditions and the following disclaimer.
  9 | // 
 10 | // * Redistributions in binary form must reproduce the above copyright
 11 | //   notice, this list of conditions and the following disclaimer in the
 12 | //   documentation and/or other materials provided with the distribution.
 13 | // 
 14 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 15 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 16 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 17 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 18 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 19 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 20 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 21 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 22 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 24 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 | 
 26 | // This package provides access to the Perl Compatible Regular
 27 | // Expresion library, PCRE.
 28 | //
 29 | // It implements two main types, Regexp and Matcher.  Regexp objects
 30 | // store a compiled regular expression.  They are immutable.
 31 | // Compilation of regular expressions using Compile or MustCompile is
 32 | // slightly expensive, so these objects should be kept and reused,
 33 | // instead of compiling them from scratch for each matching attempt.
 34 | //
 35 | // Matcher objects keeps the results of a match against a []byte or
 36 | // string subject.  The Group and GroupString functions provide access
 37 | // to capture groups; both versions work no matter if the subject was a
 38 | // []byte or string, but the version with the matching type is slightly
 39 | // more efficient.
 40 | //
 41 | // Matcher objects contain some temporary space and refer the original
 42 | // subject.  They are mutable and can be reused (using Match,
 43 | // MatchString, Reset or ResetString).
 44 | //
 45 | // For details on the regular expression language implemented by this
 46 | // package and the flags defined below, see the PCRE documentation.
 47 | package pcre
 48 | 
 49 | /*
 50 | #cgo LDFLAGS: -lpcre
 51 | #cgo CFLAGS: -I/opt/local/include
 52 | #include <pcre.h>
 53 | #include <string.h>
 54 | */
 55 | import "C"
 56 | 
 57 | import (
 58 | 	"strconv"
 59 | 	"unsafe"
 60 | )
 61 | 
 62 | // Flags for Compile and Match functions.
 63 | const (
 64 | 	ANCHORED = C.PCRE_ANCHORED
 65 | 	BSR_ANYCRLF = C.PCRE_BSR_ANYCRLF
 66 | 	BSR_UNICODE = C.PCRE_BSR_UNICODE
 67 | 	NEWLINE_ANY = C.PCRE_NEWLINE_ANY
 68 | 	NEWLINE_ANYCRLF = C.PCRE_NEWLINE_ANYCRLF
 69 | 	NEWLINE_CR = C.PCRE_NEWLINE_CR
 70 | 	NEWLINE_CRLF = C.PCRE_NEWLINE_CRLF
 71 | 	NEWLINE_LF = C.PCRE_NEWLINE_LF
 72 | 	NO_UTF8_CHECK = C.PCRE_NO_UTF8_CHECK
 73 | )
 74 | 
 75 | // Flags for Compile functions
 76 | const (
 77 | 	CASELESS = C.PCRE_CASELESS
 78 | 	DOLLAR_ENDONLY = C.PCRE_DOLLAR_ENDONLY
 79 | 	DOTALL = C.PCRE_DOTALL
 80 | 	DUPNAMES = C.PCRE_DUPNAMES
 81 | 	EXTENDED = C.PCRE_EXTENDED
 82 | 	EXTRA = C.PCRE_EXTRA
 83 | 	FIRSTLINE = C.PCRE_FIRSTLINE
 84 | 	JAVASCRIPT_COMPAT = C.PCRE_JAVASCRIPT_COMPAT
 85 | 	MULTILINE = C.PCRE_MULTILINE
 86 | 	NO_AUTO_CAPTURE = C.PCRE_NO_AUTO_CAPTURE
 87 | 	UNGREEDY = C.PCRE_UNGREEDY
 88 | 	UTF8 = C.PCRE_UTF8
 89 | )
 90 | 
 91 | // Flags for Match functions
 92 | const (
 93 | 	NOTBOL = C.PCRE_NOTBOL 
 94 | 	NOTEOL = C.PCRE_NOTEOL
 95 | 	NOTEMPTY = C.PCRE_NOTEMPTY
 96 | 	NOTEMPTY_ATSTART = C.PCRE_NOTEMPTY_ATSTART
 97 | 	NO_START_OPTIMIZE = C.PCRE_NO_START_OPTIMIZE
 98 | 	PARTIAL_HARD = C.PCRE_PARTIAL_HARD
 99 | 	PARTIAL_SOFT = C.PCRE_PARTIAL_SOFT
100 | )
101 | 
102 | // A reference to a compiled regular expression.
103 | // Use Compile or MustCompile to create such objects.
104 | type Regexp struct {
105 | 	ptr []byte
106 | }
107 | 
108 | // Number of bytes in the compiled pattern
109 | func pcresize(ptr *C.pcre) (size C.size_t) {
110 | 	C.pcre_fullinfo(ptr, nil, C.PCRE_INFO_SIZE, unsafe.Pointer(&size))
111 | 	return
112 | }
113 | 
114 | // Number of capture groups
115 | func pcregroups(ptr *C.pcre) (count C.int) {
116 | 	C.pcre_fullinfo(ptr, nil,
117 | 		C.PCRE_INFO_CAPTURECOUNT, unsafe.Pointer(&count))
118 | 	return
119 | }
120 | 
121 | // Move pattern to the Go heap so that we do not have to use a
122 | // finalizer.  PCRE patterns are fully relocatable. (We do not use
123 | // custom character tables.)
124 | func toheap(ptr *C.pcre) (re Regexp) {
125 | 	defer C.free(unsafe.Pointer(ptr))
126 | 	size := pcresize(ptr)
127 | 	re.ptr = make([]byte, size)
128 | 	C.memcpy(unsafe.Pointer(&re.ptr[0]), unsafe.Pointer(ptr), size)
129 | 	return
130 | }
131 | 
132 | // Try to compile the pattern.  If an error occurs, the second return
133 | // value is non-nil.
134 | func Compile(pattern string, flags int) (Regexp, *CompileError) {
135 | 	pattern1 := C.CString(pattern)
136 | 	defer C.free(unsafe.Pointer(pattern1))
137 | 	if clen := int(C.strlen(pattern1)); clen != len(pattern) {
138 | 		return Regexp{}, &CompileError{
139 | 			Pattern: pattern,
140 | 			Message: "NUL byte in pattern",
141 | 			Offset: clen,
142 | 		}
143 | 	}
144 | 	var errptr *C.char
145 | 	var erroffset C.int
146 | 	ptr := C.pcre_compile(pattern1, C.int(flags), &errptr, &erroffset, nil)
147 | 	if ptr == nil {
148 | 		return Regexp{}, &CompileError{
149 | 		        Pattern: pattern,
150 | 		        Message: C.GoString(errptr),
151 | 		        Offset: int(erroffset),
152 | 		}
153 | 	}
154 | 	return toheap(ptr), nil
155 | }
156 | 
157 | // Compile the pattern.  If compilation fails, panic.
158 | func MustCompile(pattern string, flags int) (re Regexp) {
159 | 	re, err := Compile(pattern, flags)
160 | 	if err != nil {
161 | 		panic(err)
162 | 	}
163 | 	return
164 | }
165 | 
166 | // Returns the number of capture groups in the compiled pattern.
167 | func (re Regexp) Groups() int {
168 | 	if re.ptr == nil {
169 | 		panic("Regexp.Groups: uninitialized")
170 | 	}
171 | 	return int(pcregroups((*C.pcre)(unsafe.Pointer(&re.ptr[0]))))
172 | }
173 | 
174 | // Matcher objects provide a place for storing match results.
175 | // They can be created by the Matcher and MatcherString functions,
176 | // or they can be initialized with Reset or ResetString.
177 | type Matcher struct {
178 | 	re Regexp
179 | 	groups int
180 | 	ovector []C.int		// scratch space for capture offsets
181 | 	matches bool		// last match was successful
182 | 	subjects string // one of these fields is set to record the subject,
183 | 	subjectb []byte // so that Group/GroupString can return slices
184 | }
185 | 
186 | // Returns a new matcher object, with the byte array slice as a
187 | // subject.
188 | func (re Regexp) Matcher(subject []byte, flags int) (m *Matcher) {
189 | 	m = new(Matcher)
190 | 	m.Reset(re, subject, flags)
191 | 	return
192 | }
193 | 
194 | // Returns a new matcher object, with the specified subject string.
195 | func (re Regexp) MatcherString(subject string, flags int) (m *Matcher) {
196 | 	m = new(Matcher)
197 | 	m.ResetString(re, subject, flags)
198 | 	return
199 | }
200 | 
201 | // Switches the matcher object to the specified pattern and subject.
202 | func (m *Matcher) Reset(re Regexp, subject []byte, flags int) {
203 | 	if re.ptr == nil {
204 | 		panic("Regexp.Matcher: uninitialized")
205 | 	}
206 | 	m.init(re)
207 | 	m.Match(subject, flags)
208 | }
209 | 
210 | // Switches the matcher object to the specified pattern and subject
211 | // string.
212 | func (m *Matcher) ResetString(re Regexp, subject string, flags int) {
213 | 	if re.ptr == nil {
214 | 		panic("Regexp.Matcher: uninitialized")
215 | 	}
216 | 	m.init(re)
217 | 	m.MatchString(subject, flags)
218 | }
219 | 
220 | func (m *Matcher) init(re Regexp) {
221 | 	m.matches = false
222 | 	if m.re.ptr != nil && &m.re.ptr[0] == &re.ptr[0] {
223 | 		// Skip group count extraction if the matcher has
224 | 		// already been initialized with the same regular
225 | 		// expression.
226 | 		return
227 | 	}
228 | 	m.re = re
229 | 	m.groups = re.Groups()
230 | 	if ovectorlen := 3 * (1 + m.groups); len(m.ovector) < ovectorlen {
231 | 		m.ovector = make([]C.int, ovectorlen)
232 | 	}
233 | }
234 | 
235 | var nullbyte = []byte{0}
236 | 
237 | // Tries to match the speficied byte array slice to the current
238 | // pattern.  Returns true if the match succeeds.
239 | func (m *Matcher) Match(subject []byte, flags int) bool {
240 | 	if m.re.ptr == nil {
241 | 		panic("Matcher.Match: uninitialized")
242 | 	}
243 | 	length := len(subject)
244 | 	m.subjects = ""
245 | 	m.subjectb = subject
246 | 	if length == 0 {
247 | 		subject = nullbyte // make first character adressable
248 | 	}
249 | 	subjectptr := (*C.char)(unsafe.Pointer(&subject[0]))
250 | 	return m.match(subjectptr, length, flags)
251 | }
252 | 
253 | // Tries to match the speficied subject string to the current pattern.
254 | // Returns true if the match succeeds.
255 | func (m *Matcher) MatchString(subject string, flags int) bool {
256 | 	if m.re.ptr == nil {
257 | 		panic("Matcher.Match: uninitialized")
258 | 	}
259 | 	length := len(subject)
260 | 	m.subjects = subject
261 | 	m.subjectb = nil
262 | 	if length == 0 {
263 | 		subject = "\000" // make first character addressable
264 | 	}
265 | 	// The following is a non-portable kludge to avoid a copy
266 | 	subjectptr := *(**C.char)(unsafe.Pointer(&subject))
267 | 	return m.match(subjectptr, length, flags)
268 | }
269 | 
270 | func (m *Matcher) match(subjectptr *C.char, length, flags int) bool {
271 | 	rc := C.pcre_exec((*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), nil,
272 | 		subjectptr, C.int(length),
273 | 		0, C.int(flags), &m.ovector[0], C.int(len(m.ovector)))
274 | 	switch {
275 | 	case rc >= 0:
276 | 		m.matches = true
277 | 		return true
278 | 	case rc == C.PCRE_ERROR_NOMATCH:
279 | 		m.matches = false
280 | 		return false
281 | 	case rc == C.PCRE_ERROR_BADOPTION:
282 | 		panic("PCRE.Match: invalid option flag")
283 | 	}
284 | 	panic("unexepected return code from pcre_exec: " +
285 | 		strconv.Itoa(int(rc)))
286 | }
287 | 
288 | // Returns true if a previous call to Matcher, MatcherString, Reset,
289 | // ResetString, Match or MatchString succeeded.
290 | func (m *Matcher) Matches() bool {
291 | 	return m.matches
292 | }
293 | 
294 | // Returns the number of groups in the current pattern.
295 | func (m *Matcher) Groups() int {
296 | 	return m.groups
297 | }
298 | 
299 | // Returns true if the numbered capture group is present in the last
300 | // match (performed by Matcher, MatcherString, Reset, ResetString,
301 | // Match, or MatchString).  Group numbers start at 1.  A capture group
302 | // can be present and match the empty string.
303 | func (m *Matcher) Present(group int) bool {
304 | 	return m.ovector[2 * group] >= 0
305 | }
306 | 
307 | // Returns the numbered capture group of the last match (performed by
308 | // Matcher, MatcherString, Reset, ResetString, Match, or MatchString).
309 | // Group 0 is the part of the subject which matches the whole pattern;
310 | // the first actual capture group is numbered 1.  Capture groups which
311 | // are not present return a nil slice.
312 | func (m *Matcher) Group(group int) []byte {
313 | 	start := m.ovector[2 * group]
314 | 	end := m.ovector[2 * group + 1]
315 | 	if start >= 0 {
316 | 		if m.subjectb != nil {
317 | 			return m.subjectb[start:end]
318 | 		}
319 | 		return []byte(m.subjects[start:end])
320 | 	}
321 | 	return nil
322 | }
323 | 
324 | // Returns the numbered capture group as a string.  Group 0 is the
325 | // part of the subject which matches the whole pattern; the first
326 | // actual capture group is numbered 1.  Capture groups which are not
327 | // present return an empty string.
328 | func (m *Matcher) GroupString(group int) string {
329 | 	start := m.ovector[2 * group]
330 | 	end := m.ovector[2 * group + 1]
331 | 	if start >= 0 {
332 | 		if m.subjectb != nil {
333 | 			return string(m.subjectb[start:end])
334 | 		}
335 | 		return m.subjects[start:end]
336 | 	}
337 | 	return ""
338 | }
339 | 
340 | func (m *Matcher) name2index(name string) (group int) {
341 | 	if m.re.ptr == nil {
342 | 		panic("Matcher.Named: uninitialized")
343 | 	}
344 | 	name1 := C.CString(name)
345 | 	defer C.free(unsafe.Pointer(name1))
346 | 	group = int(C.pcre_get_stringnumber(
347 | 		(*C.pcre)(unsafe.Pointer(&m.re.ptr[0])), name1))
348 | 	if group < 0 {
349 | 		panic("Matcher.Named: unknown name: " + name)
350 | 	}
351 | 	return
352 | }
353 | 
354 | // Returns the value of the named capture group.  This is a nil slice
355 | // if the capture group is not present.  Panics if the name does not
356 | // refer to a group.
357 | func (m *Matcher) Named(group string) []byte {
358 | 	return m.Group(m.name2index(group))
359 | }
360 | 
361 | // Returns the value of the named capture group, or an empty string if
362 | // the capture group is not present.  Panics if the name does not
363 | // refer to a group.
364 | func (m *Matcher) NamedString(group string) string {
365 | 	return m.GroupString(m.name2index(group))
366 | }
367 | 
368 | // Returns true if the named capture group is present.  Panics if the
369 | // name does not refer to a group.
370 | func (m *Matcher) NamedPresent(group string) bool {
371 | 	return m.Present(m.name2index(group))
372 | }
373 | 
374 | // Return the start and end of the first match, or nil if no match.
375 | // loc[0] is the start and loc[1] is the end.
376 | func (re *Regexp) FindIndex(bytes []byte, flags int) []int {
377 | 	m := re.Matcher(bytes, flags)
378 | 	if m.Match(bytes, flags) {
379 | 		return []int{int(m.ovector[0]), int(m.ovector[1])}
380 | 	}
381 | 	return nil
382 | }
383 | 
384 | // Return a copy of a byte slice with pattern matches replaced by repl.
385 | func (re Regexp) ReplaceAll(bytes, repl []byte, flags int) []byte {
386 | 	m := re.Matcher(bytes, 0)
387 | 	r := []byte{}
388 | 	for m.Match(bytes, flags) {
389 | 		r = append (append (r, bytes[:m.ovector[0]]...), repl...)
390 | 		bytes = bytes[m.ovector[1]:]
391 | 	}
392 | 	return append (r, bytes...)
393 | }
394 | 
395 | // A compilation error, as returned by the Compile function.  The
396 | // offset is the byte position in the pattern string at which the
397 | // error was detected.
398 | type CompileError struct {
399 | 	Pattern string
400 | 	Message string
401 | 	Offset int
402 | }
403 | 
404 | func (e *CompileError) String() string {
405 | 	return e.Pattern + " (" + strconv.Itoa(e.Offset) + "): " + e.Message
406 | }
407 | 


--------------------------------------------------------------------------------
/src/pkg/pcre/pcre_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright (C) 2011 Florian Weimer <fw@deneb.enyo.de>
  2 | 
  3 | package pcre
  4 | 
  5 | import (
  6 | 	"testing"
  7 | )
  8 | 
  9 | func TestCompile(t *testing.T) {
 10 | 	var check = func (p string, groups int) {
 11 | 		re, err := Compile(p, 0)
 12 | 		if err != nil {
 13 | 			t.Error(p, err)
 14 | 		}
 15 | 		if g := re.Groups(); g != groups {
 16 | 			t.Error(p, g)
 17 | 		}
 18 | 	}
 19 | 	check("",0 )
 20 | 	check("^", 0)
 21 | 	check("^$", 0)
 22 | 	check("()", 1)
 23 | 	check("(())", 2)
 24 | 	check("((?:))", 1)
 25 | }
 26 | 
 27 | func TestCompileFail(t *testing.T) {
 28 | 	var check = func (p, msg string, off int) {
 29 | 		_, err := Compile(p, 0)
 30 | 		switch {
 31 | 		case err == nil:
 32 | 			t.Error(p)
 33 | 		case err.Message != msg:
 34 | 			t.Error(p, "Message", err.Message)
 35 | 		case err.Offset != off:
 36 | 			t.Error(p, "Offset", err.Offset)
 37 | 		}
 38 | 	}
 39 | 	check("(", "missing )", 1)
 40 | 	check("\\", "\\ at end of pattern", 1)
 41 | 	check("abc\\", "\\ at end of pattern", 4)
 42 | 	check("abc\000", "NUL byte in pattern", 3)
 43 | 	check("a\000bc", "NUL byte in pattern", 1)
 44 | }
 45 | 
 46 | func strings(b [][]byte) (r []string) {
 47 | 	r = make([]string, len(b))
 48 | 	for i, v := range b {
 49 | 		r[i] = string(v)
 50 | 	} 
 51 | 	return
 52 | }
 53 | 
 54 | func equal(l, r []string) bool {
 55 | 	if len(l) != len(r) {
 56 | 		return false
 57 | 	}
 58 | 	for i, lv := range l {
 59 | 		if lv != r[i] {
 60 | 			return false
 61 | 		}
 62 | 	}
 63 | 	return true
 64 | }
 65 | 
 66 | func checkmatch1(t *testing.T, dostring bool, m *Matcher,
 67 | 	pattern, subject string, args ...interface{}) {
 68 | 	re := MustCompile(pattern, 0)
 69 | 	var prefix string
 70 | 	if dostring {
 71 | 		if m == nil {
 72 | 			m = re.MatcherString(subject, 0)
 73 | 		} else {
 74 | 			m.ResetString(re, subject, 0)
 75 | 		}
 76 | 		prefix = "string"
 77 | 	} else {
 78 | 		if m == nil {
 79 | 			m = re.Matcher([]byte(subject), 0)
 80 | 		} else {
 81 | 			m.Reset(re, []byte(subject), 0)
 82 | 		}
 83 | 		prefix = "[]byte"
 84 | 	}
 85 | 	if len(args) == 0 {
 86 | 		if m.Matches() {
 87 | 			t.Error(prefix, pattern, subject, "!Matches")
 88 | 		}
 89 | 	} else {
 90 | 		if !m.Matches() {
 91 | 			t.Error(prefix, pattern, subject, "Matches")
 92 | 			return
 93 | 		}
 94 | 		if m.Groups() != len(args) - 1 {
 95 | 			t.Error(prefix, pattern, subject, "Groups", m.Groups())
 96 | 			return
 97 | 		}
 98 | 		for i, arg := range args {
 99 | 			if s, ok := arg.(string); ok {
100 | 				if !m.Present(i) {
101 | 					t.Error(prefix, pattern, subject,
102 | 						"Present", i)
103 | 
104 | 				}
105 | 				if g := string(m.Group(i)); g != s {
106 | 					t.Error(prefix, pattern, subject,
107 | 						"Group", i, g, "!=", s)
108 | 				}
109 | 				if g := m.GroupString(i); g != s {
110 | 					t.Error(prefix, pattern, subject,
111 | 						"GroupString", i, g, "!=", s)
112 | 				}
113 | 			} else {
114 | 				if m.Present(i) {
115 | 					t.Error(prefix, pattern, subject,
116 | 						"!Present", i)
117 | 				}
118 | 			}
119 | 		}
120 | 	}
121 | }
122 | 
123 | func TestMatcher(t *testing.T) {
124 | 	var m Matcher
125 | 	check := func(pattern, subject string, args ...interface{}) {
126 | 		checkmatch1(t, false, nil, pattern, subject, args...)
127 | 		checkmatch1(t, true, nil, pattern, subject, args...)
128 | 		checkmatch1(t, false, &m, pattern, subject, args...)
129 | 		checkmatch1(t, true, &m, pattern, subject, args...)
130 | 	}
131 | 
132 | 	check(`^$`, "", "")
133 | 	check(`^abc$`, "abc", "abc")
134 | 	check(`^(X)*ab(c)$`, "abc", "abc", nil, "c")
135 | 	check(`^(X)*ab()c$`, "abc", "abc", nil, "")
136 | 	check(`^.*$`, "abc", "abc")
137 | 	check(`^.*$`, "a\000c", "a\000c")
138 | 	check(`^(.*)$`, "a\000c", "a\000c", "a\000c")
139 | }
140 | 
141 | func TestCaseless(t *testing.T) {
142 | 	m := MustCompile("abc", CASELESS).MatcherString("Abc", 0)
143 | 	if !m.Matches() {
144 | 		t.Error("CASELESS")
145 | 	}
146 | 	m = MustCompile("abc", 0).MatcherString("Abc", 0)
147 | 	if m.Matches() {
148 | 		t.Error("!CASELESS")
149 | 	}
150 | }
151 | 
152 | func TestNamed(t *testing.T) {
153 | 	m := MustCompile("(?<L>a)(?<M>X)*bc(?<DIGITS>\\d*)", 0).
154 | 		MatcherString("abc12", 0)
155 | 	if !m.Matches() {
156 | 		t.Error("Matches")
157 | 	}
158 | 	if !m.NamedPresent("L") {
159 | 		t.Error("NamedPresent(\"L\")")
160 | 	}
161 | 	if m.NamedPresent("M") {
162 | 		t.Error("NamedPresent(\"M\")")
163 | 	}
164 | 	if !m.NamedPresent("DIGITS") {
165 | 		t.Error("NamedPresent(\"DIGITS\")")
166 | 	}
167 | 	if "12" != m.NamedString("DIGITS") {
168 | 		t.Error("NamedString(\"DIGITS\")")
169 | 	}
170 | }
171 | 
172 | func TestFindIndex(t *testing.T) {
173 | 	re := MustCompile("bcd", 0)
174 | 	i := re.FindIndex([]byte("abcdef"), 0)
175 | 	if i[0] != 1 {
176 | 		t.Error("FindIndex start", i[0])
177 | 	}
178 | 	if i[1] != 4 {
179 | 		t.Error("FindIndex end", i[1])
180 | 	}
181 | }
182 | 
183 | func TestReplaceAll(t *testing.T) {
184 | 	re := MustCompile("foo", 0)
185 | 	// Don't change at ends.
186 | 	result := re.ReplaceAll([]byte("I like foods."), []byte("car"), 0)
187 | 	if string(result) != "I like cards." {
188 | 		t.Error ("ReplaceAll", result)
189 | 	}
190 | 	// Change at ends.
191 | 	result = re.ReplaceAll([]byte("food fight fools foo"), []byte("car"), 0)
192 | 	if string(result) != "card fight carls car" {
193 | 		t.Error("ReplaceAll2", result)
194 | 	}
195 | }
196 | 


--------------------------------------------------------------------------------