├── LICENSE ├── README.md ├── bytes.go ├── bytes_test.go ├── go.mod ├── regexer.go ├── runes.go ├── runes_test.go ├── shared.go ├── shared_runes.go ├── shared_test.go ├── string.go └── string_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | 2025 Gram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # regexer 2 | 3 | [ [📄 docs](https://pkg.go.dev/github.com/orsinium-labs/regexer) ] [ [🐙 github](https://github.com/orsinium-labs/regexer) ] 4 | 5 | Go package with more powerful, flexible, and safe API for regular expressions. The main idea is to use the Go 1.24+ iterators to make finding/replacing submatches flexible, low-memory, and stoppable. 6 | 7 | Features: 8 | 9 | * Type-safe 10 | * Lazy iteration. 11 | * Supports strings, bytes, and runes as input. 12 | * The same generic API for all inputs. 13 | * Everything possible with stdin regexp: find matches, find submatches, replace, replace with a template. 14 | * And much more, like the ability to replace only one or several matches. 15 | 16 | ## Installation 17 | 18 | ```bash 19 | go get github.com/orsinium-labs/regexer 20 | ``` 21 | 22 | ## Usage 23 | 24 | Find and print all words in the text and their position. 25 | 26 | ```go 27 | rex := regexer.New(`\w+`) 28 | input := "never gonna give you up" 29 | matches := rex.String(input).Find() 30 | for match := range matches { 31 | fmt.Println(match.Span.Start, match.Content) 32 | } 33 | ``` 34 | 35 | The same but for a slice of bytes: 36 | 37 | ```go 38 | rex := regexer.New(`\w+`) 39 | input := []byte("never gonna give you up") 40 | matches := rex.Bytes(input).Find() 41 | for match := range matches { 42 | fmt.Println(match.Span.Start, string(match.Content)) 43 | } 44 | ``` 45 | 46 | In both cases, `matches` is a lazy iterator. It doesn't require to allocate memory for all matches and if you stop iteration, it will stop scanning the input. 47 | 48 | Replacing has very similar API: 49 | 50 | ```go 51 | rex := regexer.New(`\w+`) 52 | input := "number 42 is the answer" 53 | var result string 54 | matches := rex.String(input).Replace(&result) 55 | for match := range matches { 56 | template := string(`[$1]`) 57 | match.ReplaceTemplate(template) 58 | } 59 | fmt.Println(result) 60 | // Output: [number] 42 [is] [the] [answer] 61 | ``` 62 | 63 | Accessing submatches: 64 | 65 | ```go 66 | rex := regexer.New(`([a-z.]+)@([a-z.]+)`) 67 | input := "my email is mail@example.com, text me" 68 | matches := rex.String(input).Find() 69 | for match := range matches { 70 | username := match.Subs.At(1).Content 71 | domain := match.Subs.At(2).Content 72 | fmt.Printf("username: %s; domain: %s", username, domain) 73 | } 74 | ``` 75 | -------------------------------------------------------------------------------- /bytes.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import ( 4 | "iter" 5 | "regexp" 6 | ) 7 | 8 | type ( 9 | BMatch = Match[[]byte] 10 | BSub = Sub[[]byte] 11 | BSubs = Subs[[]byte] 12 | ) 13 | 14 | type Bytes struct { 15 | rex *regexp.Regexp 16 | src []byte 17 | } 18 | 19 | func (b Bytes) Find() iter.Seq[BMatch] { 20 | return func(yield func(BMatch) bool) { 21 | shift := 0 22 | for { 23 | subSrc := b.src[shift:] 24 | spans := b.rex.FindSubmatchIndex(subSrc) 25 | if spans == nil { 26 | return 27 | } 28 | spanStart := spans[0] 29 | spanEnd := spans[1] 30 | match := BMatch{ 31 | Content: subSrc[spanStart:spanEnd], 32 | Span: Span{ 33 | Start: shift + spanStart, 34 | End: shift + spanEnd, 35 | }, 36 | Subs: BSubs{ 37 | shift: shift, 38 | content: subSrc, 39 | rawSpans: spans, 40 | }, 41 | } 42 | more := yield(match) 43 | if !more { 44 | return 45 | } 46 | shift += spanEnd 47 | } 48 | } 49 | } 50 | 51 | func (b Bytes) Replace(res *[]byte) iter.Seq[BReplacement] { 52 | return func(yield func(BReplacement) bool) { 53 | prevEnd := 0 54 | for match := range b.Find() { 55 | *res = append(*res, b.src[prevEnd:match.Span.Start]...) 56 | ok := yield(BReplacement{ 57 | Match: match, 58 | rex: b.rex, 59 | src: b.src[prevEnd:], 60 | result: res, 61 | }) 62 | if !ok { 63 | return 64 | } 65 | prevEnd = match.Span.End 66 | } 67 | *res = append(*res, b.src[prevEnd:]...) 68 | } 69 | } 70 | 71 | func (b Bytes) Contains() bool { 72 | return b.rex.Match(b.src) 73 | } 74 | 75 | type BReplacement struct { 76 | Match[[]byte] 77 | rex *regexp.Regexp 78 | src []byte 79 | result *[]byte 80 | } 81 | 82 | func (r BReplacement) ReplaceLiteral(val []byte) { 83 | *r.result = append(*r.result, val...) 84 | } 85 | 86 | func (r BReplacement) ReplaceTemplate(val []byte) { 87 | *r.result = r.rex.Expand(*r.result, val, r.src, r.Subs.rawSpans) 88 | } 89 | 90 | func (r BReplacement) ReplaceFunc(f func([]byte) []byte) { 91 | *r.result = append(*r.result, f(r.Match.Content)...) 92 | } 93 | -------------------------------------------------------------------------------- /bytes_test.go: -------------------------------------------------------------------------------- 1 | package regexer_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | 7 | "github.com/orsinium-labs/regexer" 8 | ) 9 | 10 | func ExampleBytes_Find() { 11 | rex := regexer.New(`[a-z]+`) 12 | input := []byte("never gonna give you up") 13 | matches := rex.Bytes(input).Find() 14 | for match := range matches { 15 | fmt.Println(match.Span.Start, string(match.Content)) 16 | } 17 | //Output: 18 | // 0 never 19 | // 6 gonna 20 | // 12 give 21 | // 17 you 22 | // 21 up 23 | } 24 | 25 | func ExampleBytes_Contains() { 26 | rex := regexer.New(`[0-9]+`) 27 | input := []byte("number 42 is the answer") 28 | contains := rex.Bytes(input).Contains() 29 | if contains { 30 | fmt.Println("the byte slice contains a regexp match") 31 | } 32 | //Output: the byte slice contains a regexp match 33 | } 34 | 35 | func ExampleBytes_Replace() { 36 | rex := regexer.New(`(is|the)`) 37 | input := []byte("number 42 is the answer") 38 | var result []byte 39 | matches := rex.Bytes(input).Replace(&result) 40 | for match := range matches { 41 | newVal := bytes.ToUpper(match.Content) 42 | match.ReplaceLiteral(newVal) 43 | } 44 | fmt.Println(string(result)) 45 | //Output: number 42 IS THE answer 46 | } 47 | 48 | func ExampleBReplacement_ReplaceLiteral() { 49 | rex := regexer.New(`(is|the)`) 50 | input := []byte("number 42 is the answer") 51 | var result []byte 52 | matches := rex.Bytes(input).Replace(&result) 53 | for match := range matches { 54 | newVal := bytes.ToUpper(match.Content) 55 | match.ReplaceLiteral(newVal) 56 | } 57 | fmt.Println(string(result)) 58 | //Output: number 42 IS THE answer 59 | } 60 | 61 | func ExampleBReplacement_ReplaceTemplate() { 62 | rex := regexer.New(`(is|the)`) 63 | input := []byte("number 42 is the answer") 64 | var result []byte 65 | matches := rex.Bytes(input).Replace(&result) 66 | for match := range matches { 67 | template := []byte(`[$1]`) 68 | match.ReplaceTemplate(template) 69 | } 70 | fmt.Println(string(result)) 71 | //Output: number 42 [is] [the] answer 72 | } 73 | 74 | func ExampleBReplacement_ReplaceFunc() { 75 | rex := regexer.New(`[a-z]+`) 76 | input := []byte("number 42 is the answer") 77 | var result []byte 78 | matches := rex.Bytes(input).Replace(&result) 79 | for match := range matches { 80 | match.ReplaceFunc(func(b []byte) []byte { 81 | return append(bytes.ToUpper(b[:1]), b[1:]...) 82 | }) 83 | } 84 | fmt.Println(string(result)) 85 | //Output: Number 42 Is The Answer 86 | } 87 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/orsinium-labs/regexer 2 | 3 | go 1.24.0 4 | -------------------------------------------------------------------------------- /regexer.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import "regexp" 4 | 5 | type Regex struct { 6 | rex *regexp.Regexp 7 | } 8 | 9 | type stringLiteral string 10 | 11 | func New(raw stringLiteral) Regex { 12 | return Regex{ 13 | rex: regexp.MustCompile(string(raw)), 14 | } 15 | } 16 | 17 | func (r Regex) Bytes(src []byte) Bytes { 18 | return Bytes{rex: r.rex, src: src} 19 | } 20 | 21 | func (r Regex) String(src string) String { 22 | return String{rex: r.rex, src: src} 23 | } 24 | 25 | func (r Regex) Runes(src []rune) Runes { 26 | return Runes{rex: r.rex, src: src} 27 | } 28 | -------------------------------------------------------------------------------- /runes.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "iter" 7 | "regexp" 8 | "unicode/utf8" 9 | ) 10 | 11 | type Runes struct { 12 | rex *regexp.Regexp 13 | src []rune 14 | } 15 | 16 | type runeReader struct { 17 | inner []rune 18 | consumed int 19 | } 20 | 21 | func (rr *runeReader) ReadRune() (rune, int, error) { 22 | if rr.consumed >= len(rr.inner) { 23 | return 0, 0, io.EOF 24 | } 25 | r := rr.inner[rr.consumed] 26 | rr.consumed += 1 27 | return r, utf8.RuneLen(r), nil 28 | } 29 | 30 | func (b Runes) Find() iter.Seq[RMatch] { 31 | return func(yield func(RMatch) bool) { 32 | shift := 0 33 | for { 34 | subSrc := b.src[shift:] 35 | reader := runeReader{inner: subSrc} 36 | spans := b.rex.FindReaderSubmatchIndex(&reader) 37 | if spans == nil { 38 | return 39 | } 40 | spanStart := spans[0] 41 | spanEnd := spans[1] 42 | matchSrc := subSrc[spanStart:spanEnd] 43 | match := RMatch{ 44 | Content: matchSrc, 45 | Span: Span{ 46 | Start: shift + spanStart, 47 | End: shift + spanEnd, 48 | }, 49 | Subs: RSubs{ 50 | shift: shift, 51 | content: matchSrc, 52 | rawSpans: spans, 53 | }, 54 | } 55 | more := yield(match) 56 | if !more { 57 | return 58 | } 59 | shift += spanEnd 60 | } 61 | } 62 | } 63 | 64 | func (b Runes) Replace(res *[]rune) iter.Seq[RReplacement] { 65 | return func(yield func(RReplacement) bool) { 66 | prevEnd := 0 67 | for match := range b.Find() { 68 | *res = append(*res, b.src[prevEnd:match.Span.Start]...) 69 | ok := yield(RReplacement{ 70 | RMatch: match, 71 | rex: b.rex, 72 | src: b.src[prevEnd:], 73 | result: res, 74 | }) 75 | if !ok { 76 | return 77 | } 78 | prevEnd = match.Span.End 79 | } 80 | *res = append(*res, b.src[prevEnd:]...) 81 | } 82 | } 83 | 84 | func (b Runes) Contains() bool { 85 | reader := runeReader{inner: b.src} 86 | return b.rex.MatchReader(&reader) 87 | } 88 | 89 | type RReplacement struct { 90 | RMatch 91 | rex *regexp.Regexp 92 | src []rune 93 | result *[]rune 94 | } 95 | 96 | func (r RReplacement) ReplaceLiteral(val []rune) { 97 | *r.result = append(*r.result, val...) 98 | } 99 | 100 | func (r RReplacement) ReplaceTemplate(val []rune) { 101 | // TODO: decrease the number of type conversions. 102 | suffix := r.rex.Expand(nil, runes2bytes(val), runes2bytes(r.src), r.Subs.rawSpans) 103 | *r.result = append(*r.result, bytes.Runes(suffix)...) 104 | } 105 | 106 | func (r RReplacement) ReplaceFunc(f func([]rune) []rune) { 107 | *r.result = append(*r.result, f(r.RMatch.Content)...) 108 | } 109 | 110 | func runes2bytes(runes []rune) []byte { 111 | bytes := make([]byte, 0, len(runes)) 112 | for _, r := range runes { 113 | bytes = append(bytes, 0, 0, 0, 0) 114 | n := utf8.EncodeRune(bytes[len(bytes)-4:], r) 115 | bytes = bytes[:len(bytes)-4+n] 116 | } 117 | return bytes 118 | } 119 | -------------------------------------------------------------------------------- /runes_test.go: -------------------------------------------------------------------------------- 1 | package regexer_test 2 | 3 | import ( 4 | "fmt" 5 | "unicode" 6 | 7 | "github.com/orsinium-labs/regexer" 8 | ) 9 | 10 | func ExampleRunes_Find() { 11 | rex := regexer.New(`[a-z]+`) 12 | input := []rune("never gonna give you up") 13 | matches := rex.Runes(input).Find() 14 | for match := range matches { 15 | fmt.Println(match.Span.Start, string(match.Content)) 16 | } 17 | //Output: 18 | // 0 never 19 | // 6 gonna 20 | // 12 give 21 | // 17 you 22 | // 21 up 23 | } 24 | 25 | func ExampleRunes_Contains() { 26 | rex := regexer.New(`[0-9]+`) 27 | input := []rune("number 42 is the answer") 28 | contains := rex.Runes(input).Contains() 29 | if contains { 30 | fmt.Println("the rune slice contains a regexp match") 31 | } 32 | //Output: the rune slice contains a regexp match 33 | } 34 | 35 | func ExampleRunes_Replace() { 36 | rex := regexer.New(`\w+`) 37 | input := []rune("number 42 is the answer") 38 | var result []rune 39 | matches := rex.Runes(input).Replace(&result) 40 | for match := range matches { 41 | first := unicode.ToUpper(match.Content[0]) 42 | newVal := append([]rune{first}, match.Content[1:]...) 43 | match.ReplaceLiteral(newVal) 44 | } 45 | fmt.Println(string(result)) 46 | //Output: Number 42 Is The Answer 47 | } 48 | 49 | func ExampleRReplacement_ReplaceLiteral() { 50 | rex := regexer.New(`\w+`) 51 | input := []rune("number 42 is the answer") 52 | var result []rune 53 | matches := rex.Runes(input).Replace(&result) 54 | for match := range matches { 55 | first := unicode.ToUpper(match.Content[0]) 56 | newVal := append([]rune{first}, match.Content[1:]...) 57 | match.ReplaceLiteral(newVal) 58 | } 59 | fmt.Println(string(result)) 60 | //Output: Number 42 Is The Answer 61 | } 62 | 63 | func ExampleRReplacement_ReplaceTemplate() { 64 | rex := regexer.New(`(is|the)`) 65 | input := []rune("number 42 is the answer") 66 | var result []rune 67 | matches := rex.Runes(input).Replace(&result) 68 | for match := range matches { 69 | template := []rune(`[$1]`) 70 | match.ReplaceTemplate(template) 71 | } 72 | fmt.Println(string(result)) 73 | //Output: number 42 [is] [the] answer 74 | } 75 | 76 | func ExampleRReplacement_ReplaceFunc() { 77 | rex := regexer.New(`[a-z]+`) 78 | input := []rune("number 42 is the answer") 79 | var result []rune 80 | matches := rex.Runes(input).Replace(&result) 81 | for match := range matches { 82 | match.ReplaceFunc(func(b []rune) []rune { 83 | first := unicode.ToUpper(b[0]) 84 | return append([]rune{first}, b[1:]...) 85 | }) 86 | } 87 | fmt.Println(string(result)) 88 | //Output: Number 42 Is The Answer 89 | } 90 | -------------------------------------------------------------------------------- /shared.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import "iter" 4 | 5 | type text interface { 6 | ~string | ~[]byte 7 | } 8 | 9 | type rText interface { 10 | ~string | ~[]byte | ~[]rune 11 | } 12 | 13 | type Span struct { 14 | Start int 15 | End int 16 | } 17 | 18 | func (s Span) Len() int { 19 | return s.End - s.Start 20 | } 21 | 22 | type Match[T text] struct { 23 | // The full match text. 24 | Content T 25 | // The range of the match in the original text. 26 | Span Span 27 | // Matches for sub-patterns. 28 | Subs Subs[T] 29 | } 30 | 31 | // Matches for sub-patterns. 32 | type Subs[T text] struct { 33 | content T 34 | shift int 35 | rawSpans []int 36 | } 37 | 38 | func (s Subs[T]) Len() int { 39 | return len(s.rawSpans)/2 - 1 40 | } 41 | 42 | func (s Subs[T]) At(i int) Sub[T] { 43 | start := s.rawSpans[i*2] 44 | end := s.rawSpans[i*2+1] 45 | return Sub[T]{ 46 | Content: s.content[start:end], 47 | Span: Span{ 48 | Start: s.shift + start, 49 | End: s.shift + end, 50 | }, 51 | } 52 | } 53 | 54 | func (s Subs[T]) Slice() []Sub[T] { 55 | spans := s.rawSpans 56 | nSubs := len(spans)/2 - 1 57 | subs := make([]Sub[T], 0, nSubs) 58 | for i := 0; i < len(spans); i += 2 { 59 | subStart := spans[i] 60 | subEnd := spans[i+1] 61 | sub := Sub[T]{ 62 | Content: s.content[subStart:subEnd], 63 | Span: Span{ 64 | Start: s.shift + subStart, 65 | End: s.shift + subEnd, 66 | }, 67 | } 68 | subs = append(subs, sub) 69 | } 70 | return subs 71 | } 72 | 73 | func (s Subs[T]) Iter() iter.Seq[Sub[T]] { 74 | return func(yield func(Sub[T]) bool) { 75 | spans := s.rawSpans 76 | for i := 0; i < len(spans); i += 2 { 77 | subStart := spans[i] 78 | subEnd := spans[i+1] 79 | sub := Sub[T]{ 80 | Content: s.content[subStart:subEnd], 81 | Span: Span{ 82 | Start: s.shift + subStart, 83 | End: s.shift + subEnd, 84 | }, 85 | } 86 | more := yield(sub) 87 | if !more { 88 | return 89 | } 90 | } 91 | } 92 | } 93 | 94 | type Sub[T rText] struct { 95 | Content T 96 | Span Span 97 | } 98 | -------------------------------------------------------------------------------- /shared_runes.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import "iter" 4 | 5 | // The same as [Match] but for runes. 6 | // 7 | // Because the compiler can't infer the core type of [text] if we extend it 8 | // with a slice of runes. 9 | type RMatch struct { 10 | // The full match text. 11 | Content []rune 12 | // The range of the match in the original text. 13 | Span Span 14 | // Matches for sub-patterns. 15 | Subs RSubs 16 | } 17 | 18 | // Matches for sub-patterns. 19 | type RSubs struct { 20 | content []rune 21 | shift int 22 | rawSpans []int 23 | } 24 | 25 | func (s RSubs) Len() int { 26 | return len(s.rawSpans)/2 - 1 27 | } 28 | 29 | func (s RSubs) At(i int) RSub { 30 | start := s.rawSpans[i*2] 31 | end := s.rawSpans[i*2+1] 32 | return RSub{ 33 | Content: s.content[start:end], 34 | Span: Span{ 35 | Start: s.shift + start, 36 | End: s.shift + end, 37 | }, 38 | } 39 | } 40 | 41 | func (s RSubs) Slice() []RSub { 42 | spans := s.rawSpans 43 | nSubs := len(spans)/2 - 1 44 | subs := make([]RSub, 0, nSubs) 45 | for i := 2; i < len(spans); i += 2 { 46 | subStart := spans[i] 47 | subEnd := spans[i+1] 48 | sub := RSub{ 49 | Content: s.content[subStart:subEnd], 50 | Span: Span{ 51 | Start: s.shift + subStart, 52 | End: s.shift + subEnd, 53 | }, 54 | } 55 | subs = append(subs, sub) 56 | } 57 | return subs 58 | } 59 | 60 | func (s RSubs) Iter() iter.Seq[RSub] { 61 | return func(yield func(RSub) bool) { 62 | spans := s.rawSpans 63 | for i := 2; i < len(spans); i += 2 { 64 | subStart := spans[i] 65 | subEnd := spans[i+1] 66 | sub := RSub{ 67 | Content: s.content[subStart:subEnd], 68 | Span: Span{ 69 | Start: s.shift + subStart, 70 | End: s.shift + subEnd, 71 | }, 72 | } 73 | more := yield(sub) 74 | if !more { 75 | return 76 | } 77 | } 78 | } 79 | } 80 | 81 | type RSub = Sub[[]rune] 82 | -------------------------------------------------------------------------------- /shared_test.go: -------------------------------------------------------------------------------- 1 | package regexer_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/orsinium-labs/regexer" 7 | ) 8 | 9 | func ExampleSubs_At() { 10 | rex := regexer.New(`([a-z.]+)@([a-z.]+)`) 11 | input := "my email is mail@example.com, text me" 12 | matches := rex.String(input).Find() 13 | for match := range matches { 14 | username := match.Subs.At(1).Content 15 | domain := match.Subs.At(2).Content 16 | fmt.Printf("username: %s; domain: %s", username, domain) 17 | } 18 | //Output: username: mail; domain: example.com 19 | } 20 | 21 | func ExampleSubs_Slice() { 22 | rex := regexer.New(`([a-z.]+)@([a-z.]+)`) 23 | input := "my email is mail@example.com, text me" 24 | matches := rex.String(input).Find() 25 | for match := range matches { 26 | subs := match.Subs.Slice() 27 | username := subs[1].Content 28 | domain := subs[2].Content 29 | fmt.Printf("username: %s; domain: %s", username, domain) 30 | } 31 | //Output: username: mail; domain: example.com 32 | } 33 | 34 | func ExampleSubs_Iter() { 35 | rex := regexer.New(`([a-z.]+)@([a-z.]+)`) 36 | input := "my email is mail@example.com, text me" 37 | matches := rex.String(input).Find() 38 | for match := range matches { 39 | for sub := range match.Subs.Iter() { 40 | fmt.Println(sub.Content) 41 | } 42 | } 43 | //Output: 44 | // mail@example.com 45 | // mail 46 | // example.com 47 | } 48 | -------------------------------------------------------------------------------- /string.go: -------------------------------------------------------------------------------- 1 | package regexer 2 | 3 | import ( 4 | "iter" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | type ( 10 | SMatch = Match[string] 11 | SSub = Sub[string] 12 | SSubs = Subs[string] 13 | ) 14 | 15 | type String struct { 16 | rex *regexp.Regexp 17 | src string 18 | } 19 | 20 | func (b String) Find() iter.Seq[SMatch] { 21 | return func(yield func(SMatch) bool) { 22 | shift := 0 23 | for { 24 | subSrc := b.src[shift:] 25 | spans := b.rex.FindStringSubmatchIndex(subSrc) 26 | if spans == nil { 27 | return 28 | } 29 | spanStart := spans[0] 30 | spanEnd := spans[1] 31 | match := SMatch{ 32 | Content: subSrc[spanStart:spanEnd], 33 | Span: Span{ 34 | Start: shift + spanStart, 35 | End: shift + spanEnd, 36 | }, 37 | Subs: SSubs{ 38 | shift: shift, 39 | content: subSrc, 40 | rawSpans: spans, 41 | }, 42 | } 43 | more := yield(match) 44 | if !more { 45 | return 46 | } 47 | shift += spanEnd 48 | } 49 | } 50 | } 51 | 52 | func (s String) Replace(res *string) iter.Seq[SReplacement] { 53 | return func(yield func(SReplacement) bool) { 54 | prevEnd := 0 55 | resBuilder := strings.Builder{} 56 | for match := range s.Find() { 57 | resBuilder.WriteString(s.src[prevEnd:match.Span.Start]) 58 | ok := yield(SReplacement{ 59 | Match: match, 60 | rex: s.rex, 61 | src: s.src[prevEnd:], 62 | result: &resBuilder, 63 | }) 64 | if !ok { 65 | return 66 | } 67 | prevEnd = match.Span.End 68 | } 69 | resBuilder.WriteString(s.src[prevEnd:]) 70 | *res = resBuilder.String() 71 | } 72 | } 73 | 74 | func (b String) Contains() bool { 75 | return b.rex.MatchString(b.src) 76 | } 77 | 78 | type SReplacement struct { 79 | Match[string] 80 | rex *regexp.Regexp 81 | src string 82 | result *strings.Builder 83 | } 84 | 85 | func (r SReplacement) ReplaceLiteral(val string) { 86 | r.result.WriteString(val) 87 | } 88 | 89 | func (r SReplacement) ReplaceTemplate(val string) { 90 | suffix := r.rex.ExpandString(nil, val, r.src, r.Subs.rawSpans) 91 | // TODO: avoid allocations on bytes->string conversion by using unsafe. 92 | r.result.WriteString(string(suffix)) 93 | } 94 | 95 | func (r SReplacement) ReplaceFunc(f func(string) string) { 96 | r.result.WriteString(f(r.Match.Content)) 97 | } 98 | -------------------------------------------------------------------------------- /string_test.go: -------------------------------------------------------------------------------- 1 | package regexer_test 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/orsinium-labs/regexer" 8 | ) 9 | 10 | func ExampleString_Find() { 11 | rex := regexer.New(`[a-z]+`) 12 | input := "never gonna give you up" 13 | matches := rex.String(input).Find() 14 | for match := range matches { 15 | fmt.Println(match.Span.Start, match.Content) 16 | } 17 | //Output: 18 | // 0 never 19 | // 6 gonna 20 | // 12 give 21 | // 17 you 22 | // 21 up 23 | } 24 | 25 | func ExampleString_Contains() { 26 | rex := regexer.New(`[0-9]+`) 27 | input := "number 42 is the answer" 28 | contains := rex.String(input).Contains() 29 | if contains { 30 | fmt.Println("the string contains a regexp match") 31 | } 32 | //Output: the string contains a regexp match 33 | } 34 | 35 | func ExampleString_Replace() { 36 | rex := regexer.New(`(is|the)`) 37 | input := "number 42 is the answer" 38 | var result string 39 | matches := rex.String(input).Replace(&result) 40 | for match := range matches { 41 | newVal := strings.ToUpper(match.Content) 42 | match.ReplaceLiteral(newVal) 43 | } 44 | fmt.Println(result) 45 | //Output: number 42 IS THE answer 46 | } 47 | 48 | func ExampleSReplacement_ReplaceLiteral() { 49 | rex := regexer.New(`(is|the)`) 50 | input := "number 42 is the answer" 51 | var result string 52 | matches := rex.String(input).Replace(&result) 53 | for match := range matches { 54 | newVal := strings.ToUpper(match.Content) 55 | match.ReplaceLiteral(newVal) 56 | } 57 | fmt.Println(string(result)) 58 | //Output: number 42 IS THE answer 59 | } 60 | 61 | func ExampleSReplacement_ReplaceTemplate() { 62 | rex := regexer.New(`(is|the)`) 63 | input := "number 42 is the answer" 64 | var result string 65 | matches := rex.String(input).Replace(&result) 66 | for match := range matches { 67 | template := string(`[$1]`) 68 | match.ReplaceTemplate(template) 69 | } 70 | fmt.Println(string(result)) 71 | //Output: number 42 [is] [the] answer 72 | } 73 | --------------------------------------------------------------------------------