├── LICENSE ├── common ├── cache.go ├── common.go └── typeConv.go ├── go.mod ├── go.sum ├── readme.md ├── regex.go ├── regex_test.go └── verbose └── verbose.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2022 aspiesoftweb@gmail.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /common/cache.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | type CacheMap[T any] struct { 9 | value map[string]T 10 | err map[string]error 11 | lastUse map[string]time.Time 12 | mu sync.Mutex 13 | null T 14 | } 15 | 16 | func NewCache[T any]() CacheMap[T] { 17 | return CacheMap[T]{ 18 | value: map[string]T{}, 19 | err: map[string]error{}, 20 | lastUse: map[string]time.Time{}, 21 | } 22 | } 23 | 24 | // get returns a value or an error if it exists 25 | // 26 | // if the object key does not exist, it will return both a nil/zero value (of the relevant type) and nil error 27 | func (cache *CacheMap[T]) Get(key string) (T, error) { 28 | cache.mu.Lock() 29 | defer cache.mu.Unlock() 30 | 31 | if err, ok := cache.err[key]; ok { 32 | cache.lastUse[key] = time.Now() 33 | return cache.null, err 34 | }else if val, ok := cache.value[key]; ok { 35 | cache.lastUse[key] = time.Now() 36 | return val, nil 37 | } 38 | 39 | return cache.null, nil 40 | } 41 | 42 | // set sets or adds a new key with either a value, or an error 43 | func (cache *CacheMap[T]) Set(key string, value T, err error) { 44 | cache.mu.Lock() 45 | defer cache.mu.Unlock() 46 | 47 | if err != nil { 48 | cache.err[key] = err 49 | delete(cache.value, key) 50 | cache.lastUse[key] = time.Now() 51 | }else{ 52 | cache.value[key] = value 53 | delete(cache.err, key) 54 | cache.lastUse[key] = time.Now() 55 | } 56 | } 57 | 58 | // delOld removes old cache items 59 | func (cache *CacheMap[T]) DelOld(cacheTime time.Duration){ 60 | cache.mu.Lock() 61 | defer cache.mu.Unlock() 62 | 63 | if cacheTime == 0 { 64 | for key := range cache.lastUse { 65 | delete(cache.value, key) 66 | delete(cache.err, key) 67 | delete(cache.lastUse, key) 68 | } 69 | return 70 | } 71 | 72 | now := time.Now().UnixNano() 73 | 74 | for key, lastUse := range cache.lastUse { 75 | if now - lastUse.UnixNano() > int64(cacheTime) { 76 | delete(cache.value, key) 77 | delete(cache.err, key) 78 | delete(cache.lastUse, key) 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /common/common.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "math" 5 | "syscall" 6 | ) 7 | 8 | // JoinBytes is an easy way to join multiple values into a single []byte 9 | func JoinBytes(bytes ...interface{}) []byte { 10 | res := []byte{} 11 | for _, b := range bytes { 12 | res = append(res, ToString[[]byte](b)...) 13 | } 14 | return res 15 | } 16 | 17 | // SysFreeMemory returns the amount of memory available in megabytes 18 | func SysFreeMemory() float64 { 19 | in := &syscall.Sysinfo_t{} 20 | err := syscall.Sysinfo(in) 21 | if err != nil { 22 | return 0 23 | } 24 | 25 | // If this is a 32-bit system, then these fields are 26 | // uint32 instead of uint64. 27 | // So we always convert to uint64 to match signature. 28 | return math.Round(float64(uint64(in.Freeram) * uint64(in.Unit)) / 1024 / 1024 * 100) / 100 29 | } 30 | -------------------------------------------------------------------------------- /common/typeConv.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "reflect" 5 | "strconv" 6 | ) 7 | 8 | var varType map[string]reflect.Type 9 | 10 | func init(){ 11 | varType = map[string]reflect.Type{} 12 | 13 | varType["[]interface{}"] = reflect.TypeOf([]interface{}{}) 14 | varType["array"] = varType["[]interface{}"] 15 | varType["[][]byte"] = reflect.TypeOf([][]byte{}) 16 | varType["map[string]interface{}"] = reflect.TypeOf(map[string]interface{}{}) 17 | varType["map"] = varType["map[string]interface{}"] 18 | 19 | varType["int"] = reflect.TypeOf(int(0)) 20 | varType["int64"] = reflect.TypeOf(int64(0)) 21 | varType["float64"] = reflect.TypeOf(float64(0)) 22 | varType["float32"] = reflect.TypeOf(float32(0)) 23 | 24 | varType["string"] = reflect.TypeOf("") 25 | varType["[]byte"] = reflect.TypeOf([]byte{}) 26 | varType["byteArray"] = varType["[]byte"] 27 | varType["byte"] = reflect.TypeOf([]byte{0}[0]) 28 | 29 | // ' ' returned int32 instead of byte 30 | varType["int32"] = reflect.TypeOf(int32(0)) 31 | varType["rune"] = reflect.TypeOf(rune(0)) 32 | 33 | varType["func"] = reflect.TypeOf(func(){}) 34 | 35 | varType["bool"] = reflect.TypeOf(false) 36 | 37 | varType["int8"] = reflect.TypeOf(int8(0)) 38 | varType["int16"] = reflect.TypeOf(int16(0)) 39 | 40 | varType["uint"] = reflect.TypeOf(uint(0)) 41 | varType["uint8"] = reflect.TypeOf(uint8(0)) 42 | varType["uint16"] = reflect.TypeOf(uint16(0)) 43 | varType["uint32"] = reflect.TypeOf(uint32(0)) 44 | varType["uint64"] = reflect.TypeOf(uint64(0)) 45 | varType["uintptr"] = reflect.TypeOf(uintptr(0)) 46 | 47 | varType["complex128"] = reflect.TypeOf(complex128(0)) 48 | varType["complex64"] = reflect.TypeOf(complex64(0)) 49 | 50 | varType["map[byte]interface{}"] = reflect.TypeOf(map[byte]interface{}{}) 51 | varType["map[rune]interface{}"] = reflect.TypeOf(map[byte]interface{}{}) 52 | varType["map[int]interface{}"] = reflect.TypeOf(map[int]interface{}{}) 53 | varType["map[int64]interface{}"] = reflect.TypeOf(map[int64]interface{}{}) 54 | varType["map[int32]interface{}"] = reflect.TypeOf(map[int32]interface{}{}) 55 | varType["map[float64]interface{}"] = reflect.TypeOf(map[float64]interface{}{}) 56 | varType["map[float32]interface{}"] = reflect.TypeOf(map[float32]interface{}{}) 57 | 58 | varType["map[int8]interface{}"] = reflect.TypeOf(map[int8]interface{}{}) 59 | varType["map[int16]interface{}"] = reflect.TypeOf(map[int16]interface{}{}) 60 | 61 | varType["map[uint]interface{}"] = reflect.TypeOf(map[uint]interface{}{}) 62 | varType["map[uint8]interface{}"] = reflect.TypeOf(map[uint8]interface{}{}) 63 | varType["map[uint16]interface{}"] = reflect.TypeOf(map[uint16]interface{}{}) 64 | varType["map[uint32]interface{}"] = reflect.TypeOf(map[uint32]interface{}{}) 65 | varType["map[uint64]interface{}"] = reflect.TypeOf(map[uint64]interface{}{}) 66 | varType["map[uintptr]interface{}"] = reflect.TypeOf(map[uintptr]interface{}{}) 67 | 68 | varType["map[complex128]interface{}"] = reflect.TypeOf(map[complex128]interface{}{}) 69 | varType["map[complex64]interface{}"] = reflect.TypeOf(map[complex64]interface{}{}) 70 | 71 | varType["[]string"] = reflect.TypeOf([]string{}) 72 | varType["[]bool"] = reflect.TypeOf([]bool{}) 73 | varType["[]rune"] = reflect.TypeOf([]bool{}) 74 | varType["[]int"] = reflect.TypeOf([]int{}) 75 | varType["[]int64"] = reflect.TypeOf([]int64{}) 76 | varType["[]int32"] = reflect.TypeOf([]int32{}) 77 | varType["[]float64"] = reflect.TypeOf([]float64{}) 78 | varType["[]float32"] = reflect.TypeOf([]float32{}) 79 | 80 | varType["[]int8"] = reflect.TypeOf([]int8{}) 81 | varType["[]int16"] = reflect.TypeOf([]int16{}) 82 | 83 | varType["[]uint"] = reflect.TypeOf([]uint{}) 84 | varType["[]uint8"] = reflect.TypeOf([]uint8{}) 85 | varType["[]uint16"] = reflect.TypeOf([]uint16{}) 86 | varType["[]uint32"] = reflect.TypeOf([]uint32{}) 87 | varType["[]uint64"] = reflect.TypeOf([]uint64{}) 88 | varType["[]uintptr"] = reflect.TypeOf([]uintptr{}) 89 | 90 | varType["[]complex128"] = reflect.TypeOf([]complex128{}) 91 | varType["[]complex64"] = reflect.TypeOf([]complex64{}) 92 | } 93 | 94 | // toString converts multiple types to a string|[]byte 95 | // 96 | // accepts: string, []byte, byte, int (and variants), [][]byte, []interface{} 97 | func ToString[T interface{string | []byte}](val interface{}) T { 98 | switch reflect.TypeOf(val) { 99 | case varType["string"]: 100 | return T(val.(string)) 101 | case varType["[]byte"]: 102 | return T(val.([]byte)) 103 | case varType["byte"]: 104 | return T([]byte{val.(byte)}) 105 | case varType["int"]: 106 | return T(strconv.Itoa(val.(int))) 107 | case varType["int64"]: 108 | return T(strconv.Itoa(int(val.(int64)))) 109 | case varType["int32"]: 110 | return T([]byte{byte(val.(int32))}) 111 | case varType["int16"]: 112 | return T([]byte{byte(val.(int16))}) 113 | case varType["int8"]: 114 | return T([]byte{byte(val.(int8))}) 115 | case varType["uintptr"]: 116 | return T(strconv.FormatUint(uint64(val.(uintptr)), 10)) 117 | case varType["uint"]: 118 | return T(strconv.FormatUint(uint64(val.(uint)), 10)) 119 | case varType["uint64"]: 120 | return T(strconv.FormatUint(val.(uint64), 10)) 121 | case varType["uint32"]: 122 | return T(strconv.FormatUint(uint64(val.(uint32)), 10)) 123 | case varType["uint16"]: 124 | return T(strconv.FormatUint(uint64(val.(uint16)), 10)) 125 | case varType["uint8"]: 126 | return T(strconv.FormatUint(uint64(val.(uint8)), 10)) 127 | case varType["float64"]: 128 | return T(strconv.FormatFloat(val.(float64), 'f', -1, 64)) 129 | case varType["float32"]: 130 | return T(strconv.FormatFloat(float64(val.(float32)), 'f', -1, 32)) 131 | case varType["rune"]: 132 | return T([]byte{byte(val.(rune))}) 133 | case varType["[]interface{}"]: 134 | b := make([]byte, len(val.([]interface{}))) 135 | for i, v := range val.([]interface{}) { 136 | b[i] = byte(ToNumber[int32](v)) 137 | } 138 | return T(b) 139 | case varType["[]int"]: 140 | b := make([]byte, len(val.([]int))) 141 | for i, v := range val.([]int) { 142 | b[i] = byte(v) 143 | } 144 | return T(b) 145 | case varType["[]int64"]: 146 | b := make([]byte, len(val.([]int64))) 147 | for i, v := range val.([]int64) { 148 | b[i] = byte(v) 149 | } 150 | return T(b) 151 | case varType["[]int32"]: 152 | b := make([]byte, len(val.([]int32))) 153 | for i, v := range val.([]int32) { 154 | b[i] = byte(v) 155 | } 156 | return T(b) 157 | case varType["[]int16"]: 158 | b := make([]byte, len(val.([]int16))) 159 | for i, v := range val.([]int16) { 160 | b[i] = byte(v) 161 | } 162 | return T(b) 163 | case varType["[]int8"]: 164 | b := make([]byte, len(val.([]int8))) 165 | for i, v := range val.([]int8) { 166 | b[i] = byte(v) 167 | } 168 | return T(b) 169 | case varType["[]uint"]: 170 | b := make([]byte, len(val.([]uint))) 171 | for i, v := range val.([]uint) { 172 | b[i] = byte(v) 173 | } 174 | return T(b) 175 | case varType["[]uint8"]: 176 | b := make([]byte, len(val.([]uint8))) 177 | for i, v := range val.([]uint8) { 178 | b[i] = byte(v) 179 | } 180 | return T(b) 181 | case varType["[]uint16"]: 182 | b := make([]byte, len(val.([]uint16))) 183 | for i, v := range val.([]uint16) { 184 | b[i] = byte(v) 185 | } 186 | return T(b) 187 | case varType["[]uint32"]: 188 | b := make([]byte, len(val.([]uint32))) 189 | for i, v := range val.([]uint32) { 190 | b[i] = byte(v) 191 | } 192 | return T(b) 193 | case varType["[]uint64"]: 194 | b := make([]byte, len(val.([]uint64))) 195 | for i, v := range val.([]uint64) { 196 | b[i] = byte(v) 197 | } 198 | return T(b) 199 | case varType["[]uintptr"]: 200 | b := make([]byte, len(val.([]uintptr))) 201 | for i, v := range val.([]uintptr) { 202 | b[i] = byte(v) 203 | } 204 | return T(b) 205 | case varType["[]string"]: 206 | b := []byte{} 207 | for _, v := range val.([]string) { 208 | b = append(b, []byte(v)...) 209 | } 210 | return T(b) 211 | case varType["[][]byte"]: 212 | b := []byte{} 213 | for _, v := range val.([][]byte) { 214 | b = append(b, v...) 215 | } 216 | return T(b) 217 | case varType["[]rune"]: 218 | b := []byte{} 219 | for _, v := range val.([]rune) { 220 | b = append(b, byte(v)) 221 | } 222 | return T(b) 223 | default: 224 | return T("") 225 | } 226 | } 227 | 228 | // toNumber converts multiple types to a number 229 | // 230 | // accepts: int (and variants), string, []byte, byte, bool 231 | func ToNumber[T interface{int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 | uintptr | float64 | float32}](val interface{}) T { 232 | switch reflect.TypeOf(val) { 233 | case varType["int"]: 234 | return T(val.(int)) 235 | case varType["int32"]: 236 | return T(val.(int32)) 237 | case varType["int64"]: 238 | return T(val.(int64)) 239 | case varType["float64"]: 240 | return T(val.(float64)) 241 | case varType["float32"]: 242 | return T(val.(float32)) 243 | case varType["string"]: 244 | var varT interface{} = T(0) 245 | if _, ok := varT.(float64); ok { 246 | if f, err := strconv.ParseFloat(val.(string), 64); err == nil { 247 | return T(f) 248 | } 249 | }else if _, ok := varT.(float32); ok { 250 | if f, err := strconv.ParseFloat(val.(string), 32); err == nil { 251 | return T(f) 252 | } 253 | }else if i, err := strconv.Atoi(val.(string)); err == nil { 254 | return T(i) 255 | } 256 | return 0 257 | case varType["[]byte"]: 258 | if i, err := strconv.Atoi(string(val.([]byte))); err == nil { 259 | return T(i) 260 | } 261 | return 0 262 | case varType["byte"]: 263 | if i, err := strconv.Atoi(string(val.(byte))); err == nil { 264 | return T(i) 265 | } 266 | return 0 267 | case varType["bool"]: 268 | if val.(bool) == true { 269 | return 1 270 | } 271 | return 0 272 | case varType["int8"]: 273 | return T(val.(int8)) 274 | case varType["int16"]: 275 | return T(val.(int16)) 276 | case varType["uint"]: 277 | return T(val.(uint)) 278 | case varType["uint8"]: 279 | return T(val.(uint8)) 280 | case varType["uint16"]: 281 | return T(val.(uint16)) 282 | case varType["uint32"]: 283 | return T(val.(uint32)) 284 | case varType["uint64"]: 285 | return T(val.(uint64)) 286 | case varType["uintptr"]: 287 | return T(val.(uintptr)) 288 | case varType["rune"]: 289 | if i, err := strconv.Atoi(string(val.(rune))); err == nil { 290 | return T(i) 291 | } 292 | return 0 293 | default: 294 | return 0 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/AspieSoft/go-regex/v8 2 | 3 | go 1.18 4 | 5 | require github.com/GRbit/go-pcre v1.0.1 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/GRbit/go-pcre v1.0.1 h1:8F7Wj1rxIq8ejKSXVVW2wE+4I4VnZbuOemrMk8kn3hc= 2 | github.com/GRbit/go-pcre v1.0.1/go.mod h1:0g7qVGbMpd2Odevd92x1RpaLpR3c3F/Gv2HEnI7CwEA= 3 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Go Regex 2 | 3 | [![donation link](https://img.shields.io/badge/buy%20me%20a%20coffee-paypal-blue)](https://paypal.me/shaynejrtaylor?country.x=US&locale.x=en_US) 4 | 5 | A High Performance PCRE Regex Package That Uses A Cache. 6 | 7 | Simplifies the the go-pcre regex package. 8 | After calling a regex, the compiled output gets cached to improve performance. 9 | 10 | This package uses the [go-pcre](https://github.com/GRbit/go-pcre) package for better performance. 11 | 12 | If you need better compatability, checkout my other RE2 based module [go-regex-re2](https://github.com/AspieSoft/go-regex-re2). 13 | 14 | ## Installation 15 | 16 | ```shell script 17 | go get github.com/AspieSoft/go-regex/v8 18 | ``` 19 | 20 | ## Dependencies 21 | 22 | ### Debian/Ubuntu (Linux) 23 | 24 | ```shell script 25 | sudo apt install libpcre3-dev 26 | ``` 27 | 28 | ### Fedora (Linux) 29 | 30 | ```shell script 31 | sudo dnf install pcre-devel 32 | ``` 33 | 34 | ### Arch (Linux) 35 | 36 | ```shell script 37 | sudo yum install pcre-dev 38 | ``` 39 | 40 | ## Usage 41 | 42 | ```go 43 | 44 | import ( 45 | "github.com/AspieSoft/go-regex/v8" 46 | 47 | // or for verbose function names 48 | "github.com/AspieSoft/go-regex/v8/verbose" 49 | ) 50 | 51 | // this example will use verbose mode to make function names more clear 52 | 53 | // pre compile a regex into the cache 54 | // this method also returns the compiled pcre.Regexp struct 55 | regex.Compile(`re`) 56 | 57 | // compile a regex and safely escape user input 58 | regex.Compile(`re %1`, `this will be escaped .*`); // output: this will be escaped \.\* 59 | regex.Compile(`re %1`, `hello \n world`); // output: hello \\n world (note: the \ was escaped, and the n is literal) 60 | tree/v4.0.0 61 | // use %n to reference a param 62 | // use %{n} for param indexes with more than 1 digit 63 | regex.Compile(`re %1 and %2 ... %{12}`, `param 1`, `param 2` ..., `param 12`); 64 | 65 | // manually escape a string 66 | // note: the compile methods params are automatically escaped 67 | regex.Escape(`(.*)? \$ \\$ \\\$ regex hack failed`) 68 | 69 | // determine if a regex is valid, and can be compiled by this module 70 | regex.IsValid(`re`) 71 | 72 | // determine if a regex is valid, and can be compiled by the PCRE module 73 | regex.IsValidPCRE(`re`) 74 | 75 | // determine if a regex is valid, and can be compiled by the builtin RE2 module 76 | regex.IsValidRE2(`re`) 77 | 78 | // run a replace function (most advanced feature) 79 | regex.Compile(`(?flags)re(capture group)`).ReplaceFunc(myByteArray, func(data func(int) []byte) []byte { 80 | data(0) // get the string 81 | data(1) // get the first capture group 82 | 83 | return []byte("") 84 | 85 | // if the last option is true, returning nil will stop the loop early 86 | return nil 87 | }, true /* optional: if true, will not process a return output */) 88 | 89 | // run a replace function 90 | regex.Compile(`re (capture)`).ReplaceString(myByteArray, []byte("test $1")) 91 | 92 | // run a simple light replace function 93 | regex.Compile(`re`).ReplaceStringLiteral(myByteArray, []byte("all capture groups ignored (ie: $1)")) 94 | 95 | 96 | // return a bool if a regex matches a byte array 97 | regex.Compile(`re`).Match(myByteArray) 98 | 99 | // split a byte array in a similar way to JavaScript 100 | regex.Compile(`re|(keep this and split like in JavaScript)`).Split(myByteArray) 101 | 102 | // a regex string is modified before compiling, to add a few other features 103 | `use \' in place of ` + "`" + ` to make things easier` 104 | `(?#This is a comment in regex)` 105 | 106 | // an alias of pcre.Regexp 107 | regex.PCRE 108 | 109 | // an alias of *regexp.Regexp 110 | regex.RE2 111 | 112 | // direct access to compiled pcre.Regexp 113 | regex.Compile("re").RE 114 | 115 | 116 | // another helpful function 117 | // this method makes it easier to return results to a regex function 118 | regex.JoinBytes("string", []byte("byte array"), 10, 'c', data(2)) 119 | 120 | // the above method can be used in place of this one 121 | append(append(append(append([]byte("string"), []byte("byte array")...), []byte(strconv.Itoa(10))...), 'c'), data(2)...) 122 | 123 | ``` 124 | -------------------------------------------------------------------------------- /regex.go: -------------------------------------------------------------------------------- 1 | package regex 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "regexp" 8 | "sort" 9 | "strconv" 10 | "time" 11 | 12 | "github.com/AspieSoft/go-regex/v8/common" 13 | "github.com/GRbit/go-pcre" 14 | ) 15 | 16 | type PCRE pcre.Regexp 17 | type RE2 *regexp.Regexp 18 | 19 | type Regexp struct { 20 | RE pcre.Regexp 21 | len int64 22 | } 23 | 24 | type bgPart struct { 25 | ref []byte 26 | b []byte 27 | } 28 | 29 | var regCompCommentAndChars *regexp.Regexp = regexp.MustCompile(`(\\|)\(\?#.*?\)|%!|!%|\\[\\']`) 30 | var regCompParam *regexp.Regexp = regexp.MustCompile(`(\\|)%(\{[0-9]+\}|[0-9])`) 31 | var regCompBG *regexp.Regexp = regexp.MustCompile(`\[^?(\\[\\\]]|[^\]])+\]`) 32 | var regCompBGRefChar *regexp.Regexp = regexp.MustCompile(`%!|!%`) 33 | var regCompBGRef *regexp.Regexp = regexp.MustCompile(`%!([0-9]+|o|c)!%`) 34 | 35 | var regComplexSel *Regexp 36 | var regEscape *Regexp 37 | 38 | var cache common.CacheMap[*Regexp] = common.NewCache[*Regexp]() 39 | var compCache common.CacheMap[[]byte] = common.NewCache[[]byte]() 40 | 41 | func init() { 42 | regComplexSel = Comp(`(\\|)\$([0-9]|\{[0-9]+\})`) 43 | regEscape = Comp(`[\\\^\$\.\|\?\*\+\(\)\[\]\{\}\%]`) 44 | 45 | go func(){ 46 | for { 47 | time.Sleep(10 * time.Minute) 48 | 49 | // default: remove cache items have not been accessed in over 2 hours 50 | cacheTime := 2 * time.Hour 51 | 52 | // SysFreeMemory returns the total free system memory in megabytes 53 | mb := common.SysFreeMemory() 54 | if mb < 200 && mb != 0 { 55 | // low memory: remove cache items have not been accessed in over 10 minutes 56 | cacheTime = 10 * time.Minute 57 | }else if mb < 500 && mb != 0 { 58 | // low memory: remove cache items have not been accessed in over 30 minutes 59 | cacheTime = 30 * time.Minute 60 | }else if mb < 2000 && mb != 0 { 61 | // low memory: remove cache items have not been accessed in over 1 hour 62 | cacheTime = 1 * time.Hour 63 | }else if mb > 64000 { 64 | // high memory: remove cache items have not been accessed in over 12 hour 65 | cacheTime = 12 * time.Hour 66 | }else if mb > 32000 { 67 | // high memory: remove cache items have not been accessed in over 6 hour 68 | cacheTime = 6 * time.Hour 69 | }else if mb > 16000 { 70 | // high memory: remove cache items have not been accessed in over 3 hour 71 | cacheTime = 3 * time.Hour 72 | } 73 | 74 | cache.DelOld(cacheTime) 75 | compCache.DelOld(cacheTime) 76 | 77 | time.Sleep(10 * time.Second) 78 | 79 | // clear cache if were still critically low on available memory 80 | if mb := common.SysFreeMemory(); mb < 10 && mb != 0 { 81 | cache.DelOld(0) 82 | compCache.DelOld(0) 83 | } 84 | } 85 | }() 86 | } 87 | 88 | // this method compiles the RE string to add more functionality to it 89 | func compRE(re string, params []string) string { 90 | if val, err := compCache.Get(re); val != nil || err != nil { 91 | if err != nil { 92 | return "" 93 | } 94 | 95 | return string(regCompParam.ReplaceAllFunc(val, func(b []byte) []byte { 96 | if b[1] == '{' && b[len(b)-1] == '}' { 97 | b = b[2:len(b)-1] 98 | }else{ 99 | b = b[1:] 100 | } 101 | 102 | if n, e := strconv.Atoi(string(b)); e == nil && n > 0 && n <= len(params) { 103 | return []byte(Escape(params[n-1])) 104 | } 105 | return []byte{} 106 | })) 107 | } 108 | 109 | reB := []byte(re) 110 | 111 | reB = regCompCommentAndChars.ReplaceAllFunc(reB, func(b []byte) []byte { 112 | if bytes.Equal(b, []byte("%!")) { 113 | return []byte("%!o!%") 114 | }else if bytes.Equal(b, []byte("!%")) { 115 | return []byte("%!c!%") 116 | }else if b[0] == '\\' { 117 | if b[1] == '\'' { 118 | return []byte{'`'} 119 | } 120 | return b 121 | } 122 | return []byte{} 123 | }) 124 | 125 | bgList := [][]byte{} 126 | reB = regCompBG.ReplaceAllFunc(reB, func(b []byte) []byte { 127 | bgList = append(bgList, b) 128 | return common.JoinBytes('%', '!', len(bgList)-1, '!', '%') 129 | }) 130 | 131 | for ind, bgItem := range bgList { 132 | charS := []byte{'['} 133 | if bgItem[1] == '^' { 134 | bgItem = bgItem[2:len(bgItem)-1] 135 | charS = append(charS, '^') 136 | }else{ 137 | bgItem = bgItem[1:len(bgItem)-1] 138 | } 139 | 140 | newBG := []bgPart{} 141 | for i := 0; i < len(bgItem); i++ { 142 | if i+1 < len(bgItem) { 143 | if bgItem[i] == '\\' { 144 | newBG = append(newBG, bgPart{ref: []byte{bgItem[i+1]}, b: []byte{bgItem[i], bgItem[i+1]}}) 145 | i++ 146 | continue 147 | }else if bgItem[i+1] == '-' && i+2 < len(bgItem) { 148 | newBG = append(newBG, bgPart{ref: []byte{bgItem[i], bgItem[i+2]}, b: []byte{bgItem[i], bgItem[i+1], bgItem[i+2]}}) 149 | i += 2 150 | continue 151 | } 152 | } 153 | newBG = append(newBG, bgPart{ref: []byte{bgItem[i]}, b: []byte{bgItem[i]}}) 154 | } 155 | 156 | sort.Slice(newBG, func(i, j int) bool { 157 | if len(newBG[i].ref) > len(newBG[j].ref) { 158 | return true 159 | }else if len(newBG[i].ref) < len(newBG[j].ref) { 160 | return false 161 | } 162 | 163 | for k := 0; k < len(newBG[i].ref); k++ { 164 | if newBG[i].ref[k] < newBG[j].ref[k] { 165 | return true 166 | }else if newBG[i].ref[k] > newBG[j].ref[k] { 167 | return false 168 | } 169 | } 170 | 171 | return false 172 | }) 173 | 174 | bgItem = charS 175 | for i := 0; i < len(newBG); i++ { 176 | bgItem = append(bgItem, newBG[i].b...) 177 | } 178 | bgItem = append(bgItem, ']') 179 | 180 | bgList[ind] = bgItem 181 | } 182 | 183 | reB = regCompBGRef.ReplaceAllFunc(reB, func(b []byte) []byte { 184 | b = b[2:len(b)-2] 185 | 186 | if b[0] == 'o' { 187 | return []byte(`%!`) 188 | }else if b[0] == 'c' { 189 | return []byte(`!%`) 190 | } 191 | 192 | if n, e := strconv.Atoi(string(b)); e == nil && n < len(bgList) { 193 | return bgList[n] 194 | } 195 | return []byte{} 196 | }) 197 | 198 | compCache.Set(re, reB, nil) 199 | 200 | return string(regCompParam.ReplaceAllFunc(reB, func(b []byte) []byte { 201 | if b[1] == '{' && b[len(b)-1] == '}' { 202 | b = b[2:len(b)-1] 203 | }else{ 204 | b = b[1:] 205 | } 206 | 207 | if n, e := strconv.Atoi(string(b)); e == nil && n > 0 && n <= len(params) { 208 | return []byte(Escape(params[n-1])) 209 | } 210 | return []byte{} 211 | })) 212 | } 213 | 214 | 215 | //* regex compile methods 216 | 217 | // Comp compiles a regular expression and store it in the cache 218 | func Comp(re string, params ...string) *Regexp { 219 | re = compRE(re, params) 220 | 221 | if val, err := cache.Get(re); val != nil || err != nil { 222 | if err != nil { 223 | panic(err) 224 | } 225 | 226 | return val 227 | } 228 | 229 | reg := pcre.MustCompile(re, pcre.UTF8) 230 | 231 | // commented below methods compiled 10000 times in 0.1s (above method being used finished in half of that time) 232 | // reg := pcre.MustCompileParse(re) 233 | // reg := pcre.MustCompileJIT(re, pcre.UTF8, pcre.STUDY_JIT_COMPILE) 234 | // reg := pcre.MustCompileJIT(re, pcre.EXTRA, pcre.STUDY_JIT_COMPILE) 235 | // reg := pcre.MustCompileJIT(re, pcre.JAVASCRIPT_COMPAT, pcre.STUDY_JIT_COMPILE) 236 | // reg := pcre.MustCompileParseJIT(re, pcre.STUDY_JIT_COMPILE) 237 | 238 | compRe := Regexp{RE: reg, len: int64(len(re))} 239 | 240 | cache.Set(re, &compRe, nil) 241 | return &compRe 242 | } 243 | 244 | // CompTry tries to compile or returns an error 245 | func CompTry(re string, params ...string) (*Regexp, error) { 246 | re = compRE(re, params) 247 | 248 | if val, err := cache.Get(re); val != nil || err != nil { 249 | if err != nil { 250 | return &Regexp{}, err 251 | } 252 | 253 | return val, nil 254 | } 255 | 256 | reg, err := pcre.Compile(re, pcre.UTF8) 257 | if err != nil { 258 | cache.Set(re, nil, err) 259 | return &Regexp{}, err 260 | } 261 | 262 | // commented below methods compiled 10000 times in 0.1s (above method being used finished in half of that time) 263 | // reg := pcre.MustCompileParse(re) 264 | // reg := pcre.MustCompileJIT(re, pcre.UTF8, pcre.STUDY_JIT_COMPILE) 265 | // reg := pcre.MustCompileJIT(re, pcre.EXTRA, pcre.STUDY_JIT_COMPILE) 266 | // reg := pcre.MustCompileJIT(re, pcre.JAVASCRIPT_COMPAT, pcre.STUDY_JIT_COMPILE) 267 | // reg := pcre.MustCompileParseJIT(re, pcre.STUDY_JIT_COMPILE) 268 | 269 | compRe := Regexp{RE: reg, len: int64(len(re))} 270 | 271 | cache.Set(re, &compRe, nil) 272 | return &compRe, nil 273 | } 274 | 275 | 276 | //* regex methods 277 | 278 | // RepFunc replaces a string with the result of a function 279 | // 280 | // similar to JavaScript .replace(/re/, function(data){}) 281 | func (reg *Regexp) RepFunc(str []byte, rep func(data func(int) []byte) []byte, blank ...bool) []byte { 282 | ind := reg.RE.FindAllIndex(str, 0) 283 | 284 | res := []byte{} 285 | trim := 0 286 | for _, pos := range ind { 287 | v := str[pos[0]:pos[1]] 288 | m := reg.RE.NewMatcher(v, 0) 289 | 290 | if len(blank) != 0 { 291 | gCache := map[int][]byte{} 292 | r := rep(func(g int) []byte { 293 | if v, ok := gCache[g]; ok { 294 | return v 295 | } 296 | v := m.Group(g) 297 | gCache[g] = v 298 | return v 299 | }) 300 | 301 | if []byte(r) == nil { 302 | return []byte{} 303 | } 304 | } else { 305 | if trim == 0 { 306 | res = append(res, str[:pos[0]]...) 307 | } else { 308 | res = append(res, str[trim:pos[0]]...) 309 | } 310 | trim = pos[1] 311 | 312 | gCache := map[int][]byte{} 313 | r := rep(func(g int) []byte { 314 | if v, ok := gCache[g]; ok { 315 | return v 316 | } 317 | v := m.Group(g) 318 | gCache[g] = v 319 | return v 320 | }) 321 | 322 | if []byte(r) == nil { 323 | res = append(res, str[trim:]...) 324 | return res 325 | } 326 | 327 | res = append(res, r...) 328 | } 329 | } 330 | 331 | if len(blank) != 0 { 332 | return []byte{} 333 | } 334 | 335 | res = append(res, str[trim:]...) 336 | 337 | return res 338 | } 339 | 340 | // RepStrLit replaces a string with another string 341 | // 342 | // note: this function is optimized for performance, and the replacement string does not accept replacements like $1 343 | func (reg *Regexp) RepStrLit(str []byte, rep []byte) []byte { 344 | return reg.RE.ReplaceAll(str, rep, 0) 345 | } 346 | 347 | // RepStr is a more complex version of the RepStrLit method 348 | // 349 | // this function will replace things in the result like $1 with your capture groups 350 | // 351 | // use $0 to use the full regex capture group 352 | // 353 | // use ${123} to use numbers with more than one digit 354 | func (reg *Regexp) RepStr(str []byte, rep []byte) []byte { 355 | ind := reg.RE.FindAllIndex(str, 0) 356 | 357 | res := []byte{} 358 | trim := 0 359 | for _, pos := range ind { 360 | v := str[pos[0]:pos[1]] 361 | m := reg.RE.NewMatcher(v, 0) 362 | 363 | if trim == 0 { 364 | res = append(res, str[:pos[0]]...) 365 | } else { 366 | res = append(res, str[trim:pos[0]]...) 367 | } 368 | trim = pos[1] 369 | 370 | r := regComplexSel.RepFunc(rep, func(data func(int) []byte) []byte { 371 | if len(data(1)) != 0 { 372 | return data(0) 373 | } 374 | n := data(2) 375 | if len(n) > 1 { 376 | n = n[1:len(n)-1] 377 | } 378 | if i, err := strconv.Atoi(string(n)); err == nil { 379 | return m.Group(i) 380 | } 381 | return []byte{} 382 | }) 383 | 384 | if r == nil { 385 | res = append(res, str[trim:]...) 386 | return res 387 | } 388 | 389 | res = append(res, r...) 390 | } 391 | 392 | res = append(res, str[trim:]...) 393 | 394 | return res 395 | } 396 | 397 | // Match returns true if a []byte matches a regex 398 | func (reg *Regexp) Match(str []byte) bool { 399 | return reg.RE.MatchWFlags(str, 0) 400 | } 401 | 402 | // Split splits a string, and keeps capture groups 403 | // 404 | // Similar to JavaScript .split(/re/) 405 | func (reg *Regexp) Split(str []byte) [][]byte { 406 | ind := reg.RE.FindAllIndex(str, 0) 407 | 408 | res := [][]byte{} 409 | trim := 0 410 | for _, pos := range ind { 411 | v := str[pos[0]:pos[1]] 412 | m := reg.RE.NewMatcher(v, 0) 413 | 414 | if trim == 0 { 415 | res = append(res, str[:pos[0]]) 416 | } else { 417 | res = append(res, str[trim:pos[0]]) 418 | } 419 | trim = pos[1] 420 | 421 | for i := 1; i <= m.Groups; i++ { 422 | g := m.Group(i) 423 | if len(g) != 0 { 424 | res = append(res, m.Group(i)) 425 | } 426 | } 427 | } 428 | 429 | e := str[trim:] 430 | if len(e) != 0 { 431 | res = append(res, str[trim:]) 432 | } 433 | 434 | return res 435 | } 436 | 437 | 438 | //* other regex methods 439 | 440 | // Escape will escape regex special chars 441 | func Escape(re string) string { 442 | return string(regEscape.RepStr([]byte(re), []byte(`\$1`))) 443 | } 444 | 445 | // IsValid will return true if a regex is valid and can be compiled by this module 446 | func IsValid(re string) bool { 447 | re = compRE(re, []string{}) 448 | if _, err := pcre.Compile(re, pcre.UTF8); err == nil { 449 | return true 450 | } 451 | return false 452 | } 453 | 454 | // IsValidPCRE will return true if a regex is valid and can be compiled by the PCRE module 455 | func IsValidPCRE(re string) bool { 456 | if _, err := pcre.Compile(re, pcre.UTF8); err == nil { 457 | return true 458 | } 459 | return false 460 | } 461 | 462 | // IsValidRE2 will return true if a regex is valid and can be compiled by the builtin RE2 module 463 | func IsValidRE2(re string) bool { 464 | if _, err := regexp.Compile(re); err == nil { 465 | return true 466 | } 467 | return false 468 | } 469 | 470 | 471 | // JoinBytes is an easy way to join multiple values into a single []byte 472 | func JoinBytes(bytes ...interface{}) []byte { 473 | return common.JoinBytes(bytes...) 474 | } 475 | 476 | 477 | // RepFileStr replaces a regex match with a new []byte in a file 478 | // 479 | // @all: if true, will replace all text matching @re, 480 | // if false, will only replace the first occurrence 481 | func (reg *Regexp) RepFileStr(name string, rep []byte, all bool, maxReSize ...int64) error { 482 | stat, err := os.Stat(name) 483 | if err != nil || stat.IsDir() { 484 | return err 485 | } 486 | 487 | file, err := os.OpenFile(name, os.O_RDWR, stat.Mode().Perm()) 488 | if err != nil { 489 | return err 490 | } 491 | defer file.Close() 492 | 493 | var found bool 494 | 495 | l := int64(reg.len * 10) 496 | if l < 1024 { 497 | l = 1024 498 | } 499 | for _, maxRe := range maxReSize { 500 | if l < maxRe { 501 | l = maxRe 502 | } 503 | } 504 | 505 | i := int64(0) 506 | 507 | buf := make([]byte, l) 508 | size, err := file.ReadAt(buf, i) 509 | buf = buf[:size] 510 | for err == nil { 511 | if reg.Match(buf) { 512 | found = true 513 | 514 | repRes := reg.RepStr(buf, rep) 515 | 516 | rl := int64(len(repRes)) 517 | if rl == l { 518 | file.WriteAt(repRes, i) 519 | file.Sync() 520 | }else if rl < l { 521 | file.WriteAt(repRes, i) 522 | rl = l - rl 523 | 524 | j := i+l 525 | 526 | b := make([]byte, 1024) 527 | s, e := file.ReadAt(b, j) 528 | b = b[:s] 529 | 530 | for e == nil { 531 | file.WriteAt(b, j-rl) 532 | j += 1024 533 | b = make([]byte, 1024) 534 | s, e = file.ReadAt(b, j) 535 | b = b[:s] 536 | } 537 | 538 | if s != 0 { 539 | file.WriteAt(b, j-rl) 540 | j += int64(s) 541 | } 542 | 543 | file.Truncate(j-rl) 544 | file.Sync() 545 | }else if rl > l { 546 | rl -= l 547 | 548 | dif := int64(1024) 549 | if rl > dif { 550 | dif = rl 551 | } 552 | 553 | j := i+l 554 | 555 | b := make([]byte, dif) 556 | s, e := file.ReadAt(b, j) 557 | bw := b[:s] 558 | 559 | file.WriteAt(repRes, i) 560 | j += rl 561 | 562 | for e == nil { 563 | b = make([]byte, dif) 564 | s, e = file.ReadAt(b, j+dif-rl) 565 | 566 | file.WriteAt(bw, j) 567 | bw = b[:s] 568 | 569 | j += dif 570 | } 571 | 572 | file.WriteAt(bw, j) 573 | file.Sync() 574 | } 575 | 576 | if !all { 577 | file.Sync() 578 | file.Close() 579 | return nil 580 | } 581 | 582 | i += int64(len(repRes)) 583 | } 584 | 585 | i++ 586 | buf = make([]byte, l) 587 | size, err = file.ReadAt(buf, i) 588 | buf = buf[:size] 589 | } 590 | 591 | if reg.Match(buf) { 592 | found = true 593 | 594 | repRes := reg.RepStr(buf, rep) 595 | 596 | rl := int64(len(repRes)) 597 | if rl == l { 598 | file.WriteAt(repRes, i) 599 | file.Sync() 600 | }else if rl < l { 601 | file.WriteAt(repRes, i) 602 | rl = l - rl 603 | 604 | j := i+l 605 | 606 | b := make([]byte, 1024) 607 | s, e := file.ReadAt(b, j) 608 | b = b[:s] 609 | 610 | for e == nil { 611 | file.WriteAt(b, j-rl) 612 | j += 1024 613 | b = make([]byte, 1024) 614 | s, e = file.ReadAt(b, j) 615 | b = b[:s] 616 | } 617 | 618 | if s != 0 { 619 | file.WriteAt(b, j-rl) 620 | j += int64(s) 621 | } 622 | 623 | file.Truncate(j-rl) 624 | file.Sync() 625 | }else if rl > l { 626 | rl -= l 627 | 628 | dif := int64(1024) 629 | if rl > dif { 630 | dif = rl 631 | } 632 | 633 | j := i+l 634 | 635 | b := make([]byte, dif) 636 | s, e := file.ReadAt(b, j) 637 | bw := b[:s] 638 | 639 | file.WriteAt(repRes, i) 640 | j += rl 641 | 642 | for e == nil { 643 | b = make([]byte, dif) 644 | s, e = file.ReadAt(b, j+dif-rl) 645 | 646 | file.WriteAt(bw, j) 647 | bw = b[:s] 648 | 649 | j += dif 650 | } 651 | 652 | file.WriteAt(bw, j) 653 | file.Sync() 654 | } 655 | } 656 | 657 | file.Sync() 658 | file.Close() 659 | 660 | if !found { 661 | return io.EOF 662 | } 663 | return nil 664 | } 665 | 666 | // RepFileFunc replaces a regex match with the result of a callback function in a file 667 | // 668 | // @all: if true, will replace all text matching @re, 669 | // if false, will only replace the first occurrence 670 | func (reg *Regexp) RepFileFunc(name string, rep func(data func(int) []byte) []byte, all bool, maxReSize ...int64) error { 671 | stat, err := os.Stat(name) 672 | if err != nil || stat.IsDir() { 673 | return err 674 | } 675 | 676 | file, err := os.OpenFile(name, os.O_RDWR, stat.Mode().Perm()) 677 | if err != nil { 678 | return err 679 | } 680 | defer file.Close() 681 | 682 | var found bool 683 | 684 | l := int64(reg.len * 10) 685 | if l < 1024 { 686 | l = 1024 687 | } 688 | for _, maxRe := range maxReSize { 689 | if l < maxRe { 690 | l = maxRe 691 | } 692 | } 693 | 694 | i := int64(0) 695 | 696 | buf := make([]byte, l) 697 | size, err := file.ReadAt(buf, i) 698 | buf = buf[:size] 699 | for err == nil { 700 | if reg.Match(buf) { 701 | found = true 702 | 703 | repRes := reg.RepFunc(buf, rep) 704 | 705 | rl := int64(len(repRes)) 706 | if rl == l { 707 | file.WriteAt(repRes, i) 708 | file.Sync() 709 | }else if rl < l { 710 | file.WriteAt(repRes, i) 711 | rl = l - rl 712 | 713 | j := i+l 714 | 715 | b := make([]byte, 1024) 716 | s, e := file.ReadAt(b, j) 717 | b = b[:s] 718 | 719 | for e == nil { 720 | file.WriteAt(b, j-rl) 721 | j += 1024 722 | b = make([]byte, 1024) 723 | s, e = file.ReadAt(b, j) 724 | b = b[:s] 725 | } 726 | 727 | if s != 0 { 728 | file.WriteAt(b, j-rl) 729 | j += int64(s) 730 | } 731 | 732 | file.Truncate(j-rl) 733 | file.Sync() 734 | }else if rl > l { 735 | rl -= l 736 | 737 | dif := int64(1024) 738 | if rl > dif { 739 | dif = rl 740 | } 741 | 742 | j := i+l 743 | 744 | b := make([]byte, dif) 745 | s, e := file.ReadAt(b, j) 746 | bw := b[:s] 747 | 748 | file.WriteAt(repRes, i) 749 | j += rl 750 | 751 | for e == nil { 752 | b = make([]byte, dif) 753 | s, e = file.ReadAt(b, j+dif-rl) 754 | 755 | file.WriteAt(bw, j) 756 | bw = b[:s] 757 | 758 | j += dif 759 | } 760 | 761 | file.WriteAt(bw, j) 762 | file.Sync() 763 | } 764 | 765 | if !all { 766 | file.Sync() 767 | file.Close() 768 | return nil 769 | } 770 | 771 | i += int64(len(repRes)) 772 | } 773 | 774 | i++ 775 | buf = make([]byte, l) 776 | size, err = file.ReadAt(buf, i) 777 | buf = buf[:size] 778 | } 779 | 780 | if reg.Match(buf) { 781 | found = true 782 | 783 | repRes := reg.RepFunc(buf, rep) 784 | 785 | rl := int64(len(repRes)) 786 | if rl == l { 787 | file.WriteAt(repRes, i) 788 | file.Sync() 789 | }else if rl < l { 790 | file.WriteAt(repRes, i) 791 | rl = l - rl 792 | 793 | j := i+l 794 | 795 | b := make([]byte, 1024) 796 | s, e := file.ReadAt(b, j) 797 | b = b[:s] 798 | 799 | for e == nil { 800 | file.WriteAt(b, j-rl) 801 | j += 1024 802 | b = make([]byte, 1024) 803 | s, e = file.ReadAt(b, j) 804 | b = b[:s] 805 | } 806 | 807 | if s != 0 { 808 | file.WriteAt(b, j-rl) 809 | j += int64(s) 810 | } 811 | 812 | file.Truncate(j-rl) 813 | file.Sync() 814 | }else if rl > l { 815 | rl -= l 816 | 817 | dif := int64(1024) 818 | if rl > dif { 819 | dif = rl 820 | } 821 | 822 | j := i+l 823 | 824 | b := make([]byte, dif) 825 | s, e := file.ReadAt(b, j) 826 | bw := b[:s] 827 | 828 | file.WriteAt(repRes, i) 829 | j += rl 830 | 831 | for e == nil { 832 | b = make([]byte, dif) 833 | s, e = file.ReadAt(b, j+dif-rl) 834 | 835 | file.WriteAt(bw, j) 836 | bw = b[:s] 837 | 838 | j += dif 839 | } 840 | 841 | file.WriteAt(bw, j) 842 | file.Sync() 843 | } 844 | } 845 | 846 | file.Sync() 847 | file.Close() 848 | 849 | if !found { 850 | return io.EOF 851 | } 852 | return nil 853 | } 854 | -------------------------------------------------------------------------------- /regex_test.go: -------------------------------------------------------------------------------- 1 | package regex 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "math/rand" 7 | "strconv" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | func TestCompile(t *testing.T) { 13 | reC := Comp("this is test %1", "a") 14 | if reC.RE.ReplaceAllString(`this is test a`, `this is test b`, 0) != `this is test b` { 15 | t.Error(`[this is test %1] [a]`, "\n", errors.New("failed to compile params")) 16 | } 17 | 18 | re := `test .*` 19 | reEscaped := Escape(re) 20 | if re == reEscaped || Comp(reEscaped).Match([]byte(`test 1`)) { 21 | t.Error("[", reEscaped, "]\n", errors.New("escape function failed")) 22 | } 23 | 24 | r := Comp(`test %1`, "%2", "a") 25 | if r.Match([]byte(`test a`)) { 26 | t.Error(`[test %1] [%2, a]`, "\n", errors.New("escape function failed to escape '%' char")) 27 | } 28 | } 29 | 30 | func TestReplaceStr(t *testing.T) { 31 | var check = func(s string, re, r string, e string) { 32 | res := Comp(re).RepStrLit([]byte(s), []byte(r)) 33 | if !bytes.Equal(res, []byte(e)) { 34 | t.Error("[", string(res), "]\n", errors.New("result does not match expected result")) 35 | } 36 | } 37 | 38 | check("this is a test", `(?#a\s+)test`, "", "this is a ") 39 | check("string with `block` quotes", `\'.*?\'`, "'single'", "string with 'single' quotes") 40 | } 41 | 42 | func TestReplaceStrComplex(t *testing.T) { 43 | var check = func(s string, re, r string, e string) { 44 | res := Comp(re).RepStr([]byte(s), []byte(r)) 45 | if !bytes.Equal(res, []byte(e)) { 46 | t.Error("[", string(res), "]\n", errors.New("result does not match expected result")) 47 | } 48 | } 49 | 50 | check("this is a Test", `(?i)a (test)`, "some $1", "this is some Test") 51 | check("I Need Coffee!!!", `Coffee(!*)`, "More Coffee$1", "I Need More Coffee!!!") 52 | } 53 | 54 | func TestReplaceFunc(t *testing.T) { 55 | var check = func(s string, re, r string, e string) { 56 | res := Comp(re).RepFunc([]byte(s), func(data func(int) []byte) []byte { 57 | return JoinBytes(data(1), ' ', r) 58 | }) 59 | if !bytes.Equal(res, []byte(e)) { 60 | t.Error("[", string(res), "]\n", errors.New("result does not match expected result")) 61 | } 62 | } 63 | 64 | check("this is a new test", `(new) test`, "pizza", "this is a new pizza") 65 | check("a random string", `(a) random`, "not so random", "a not so random string") 66 | } 67 | 68 | func TestConcurrent(t *testing.T) { 69 | for i := 0; i < 10; i++ { 70 | for j := 0; j < 10; j++ { 71 | go (func() { 72 | res := Comp(`(t)`).RepFunc([]byte("test"), func(data func(int) []byte) []byte { 73 | return data(1) 74 | }) 75 | _ = res 76 | time.Sleep(10 * time.Nanosecond) 77 | })() 78 | } 79 | 80 | // time.Sleep(1000000 * 1000) // 1 second 81 | time.Sleep(1000000 * 100) // 0.1 second 82 | } 83 | } 84 | 85 | func TestCache(t *testing.T) { 86 | var check = func(s string, re, r string, e string) { 87 | res := Comp(re).RepStrLit([]byte(s), []byte(r)) 88 | if !bytes.Equal(res, []byte(e)) { 89 | t.Error("[", string(res), "]\n", errors.New("result does not match expected result")) 90 | } 91 | } 92 | 93 | check("this is a test", `\sis\s`, " was ", "this was a test") 94 | check("this is a test", `\sis\s`, " was ", "this was a test") 95 | } 96 | 97 | func TestFlags(t *testing.T) { 98 | var check = func(s string, re, r string, e string) { 99 | res := Comp(re).RepStrLit([]byte(s), []byte(r)) 100 | if !bytes.Equal(res, []byte(e)) { 101 | t.Error("[", string(res), "]\n", errors.New("result does not match expected result")) 102 | } 103 | } 104 | 105 | check("this is a\nmultiline text", `(?s)a\s*multiline`, "", "this is text") 106 | check("list line 1\nlist line 2\n list line 3", `(?m)^list`, "a list", "a list line 1\na list line 2\n list line 3") 107 | check("a MultiCase text", `(?i)multicase`, "", "a text") 108 | check("a MultiCase text no flag", `multicase`, "", "a MultiCase text no flag") 109 | 110 | // check("a multi\nline text", `multi\s*line`, "", "a multi\nline text") 111 | } 112 | 113 | func TestPerformance(t *testing.T) { 114 | for i := 0; i < 10000; i++ { 115 | Comp(strconv.Itoa(rand.Int())) 116 | } 117 | } 118 | 119 | func TestValid(t *testing.T) { 120 | var check = func(re string, e bool) { 121 | res := IsValid(re) 122 | if res != e { 123 | t.Error("[", string(re), "]\n", errors.New("result does not match expected result")) 124 | } 125 | } 126 | 127 | check(`[\w_\-]+`, true) 128 | check(`[\w_-.]+`, false) 129 | check(`(?)`, true) 130 | check(`(?i)test`, true) 131 | } 132 | -------------------------------------------------------------------------------- /verbose/verbose.go: -------------------------------------------------------------------------------- 1 | package regex 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "regexp" 7 | 8 | "github.com/AspieSoft/go-regex/v8" 9 | "github.com/AspieSoft/go-regex/v8/common" 10 | "github.com/GRbit/go-pcre" 11 | ) 12 | 13 | type PCRE pcre.Regexp 14 | type RE2 *regexp.Regexp 15 | 16 | type Regexp struct { 17 | RE pcre.Regexp 18 | reg *regex.Regexp 19 | len int64 20 | } 21 | 22 | 23 | //* regex compile methods 24 | 25 | // Comp compiles a regular expression and store it in the cache 26 | func Compile(re string, params ...string) *Regexp { 27 | reg := regex.Comp(re, params...) 28 | return &Regexp{RE: reg.RE, reg: reg, len: int64(len(re))} 29 | } 30 | 31 | // CompTry tries to compile or returns an error 32 | func CompileTry(re string, params ...string) (*Regexp, error) { 33 | reg, err := regex.CompTry(re, params...) 34 | if err != nil { 35 | return &Regexp{}, err 36 | } 37 | return &Regexp{RE: reg.RE, len: int64(len(re))}, nil 38 | } 39 | 40 | 41 | //* regex methods 42 | 43 | // RepFunc replaces a string with the result of a function 44 | // 45 | // similar to JavaScript .replace(/re/, function(data){}) 46 | func (reg *Regexp) ReplaceFunc(str []byte, rep func(data func(int) []byte) []byte, blank ...bool) []byte { 47 | return reg.reg.RepFunc(str, rep, blank...) 48 | } 49 | 50 | // ReplaceStringLiteral replaces a string with another string 51 | // 52 | // @rep uses the literal string, and does Not use args like $1 53 | func (reg *Regexp) ReplaceStringLiteral(str []byte, rep []byte) []byte { 54 | return reg.reg.RepStrLit(str, rep) 55 | } 56 | 57 | // ReplaceString is a more complex version of the RepStr method 58 | // 59 | // this function will replace things in the result like $1 with your capture groups 60 | // 61 | // use $0 to use the full regex capture group 62 | // 63 | // use ${123} to use numbers with more than one digit 64 | func (reg *Regexp) ReplaceString(str []byte, rep []byte) []byte { 65 | return reg.reg.RepStr(str, rep) 66 | } 67 | 68 | // Match returns true if a []byte matches a regex 69 | func (reg *Regexp) Match(str []byte) bool { 70 | return reg.reg.Match(str) 71 | } 72 | 73 | // Split splits a string, and keeps capture groups 74 | // 75 | // Similar to JavaScript .split(/re/) 76 | func (reg *Regexp) Split(str []byte) [][]byte { 77 | return reg.reg.Split(str) 78 | } 79 | 80 | 81 | //* other regex methods 82 | 83 | // Escape will escape regex special chars 84 | func Escape(re string) string { 85 | return regex.Escape(re) 86 | } 87 | 88 | // IsValid will return true if a regex is valid and can compile 89 | func IsValid(str []byte) bool { 90 | if _, err := regexp.Compile(string(str)); err == nil { 91 | return true 92 | } 93 | return false 94 | } 95 | 96 | // JoinBytes is an easy way to join multiple values into a single []byte 97 | func JoinBytes(bytes ...interface{}) []byte { 98 | return common.JoinBytes(bytes...) 99 | } 100 | 101 | 102 | // ReplaceFileString replaces a regex match with a new []byte in a file 103 | // 104 | // @all: if true, will replace all text matching @re, 105 | // if false, will only replace the first occurrence 106 | func (reg *Regexp) ReplaceFileString(name string, rep []byte, all bool, maxReSize ...int64) error { 107 | stat, err := os.Stat(name) 108 | if err != nil || stat.IsDir() { 109 | return err 110 | } 111 | 112 | file, err := os.OpenFile(name, os.O_RDWR, stat.Mode().Perm()) 113 | if err != nil { 114 | return err 115 | } 116 | defer file.Close() 117 | 118 | var found bool 119 | 120 | l := int64(reg.len * 10) 121 | if l < 1024 { 122 | l = 1024 123 | } 124 | for _, maxRe := range maxReSize { 125 | if l < maxRe { 126 | l = maxRe 127 | } 128 | } 129 | 130 | i := int64(0) 131 | 132 | buf := make([]byte, l) 133 | size, err := file.ReadAt(buf, i) 134 | buf = buf[:size] 135 | for err == nil { 136 | if reg.Match(buf) { 137 | found = true 138 | 139 | repRes := reg.ReplaceString(buf, rep) 140 | 141 | rl := int64(len(repRes)) 142 | if rl == l { 143 | file.WriteAt(repRes, i) 144 | file.Sync() 145 | }else if rl < l { 146 | file.WriteAt(repRes, i) 147 | rl = l - rl 148 | 149 | j := i+l 150 | 151 | b := make([]byte, 1024) 152 | s, e := file.ReadAt(b, j) 153 | b = b[:s] 154 | 155 | for e == nil { 156 | file.WriteAt(b, j-rl) 157 | j += 1024 158 | b = make([]byte, 1024) 159 | s, e = file.ReadAt(b, j) 160 | b = b[:s] 161 | } 162 | 163 | if s != 0 { 164 | file.WriteAt(b, j-rl) 165 | j += int64(s) 166 | } 167 | 168 | file.Truncate(j-rl) 169 | file.Sync() 170 | }else if rl > l { 171 | rl -= l 172 | 173 | dif := int64(1024) 174 | if rl > dif { 175 | dif = rl 176 | } 177 | 178 | j := i+l 179 | 180 | b := make([]byte, dif) 181 | s, e := file.ReadAt(b, j) 182 | bw := b[:s] 183 | 184 | file.WriteAt(repRes, i) 185 | j += rl 186 | 187 | for e == nil { 188 | b = make([]byte, dif) 189 | s, e = file.ReadAt(b, j+dif-rl) 190 | 191 | file.WriteAt(bw, j) 192 | bw = b[:s] 193 | 194 | j += dif 195 | } 196 | 197 | file.WriteAt(bw, j) 198 | file.Sync() 199 | } 200 | 201 | if !all { 202 | file.Sync() 203 | file.Close() 204 | return nil 205 | } 206 | 207 | i += int64(len(repRes)) 208 | } 209 | 210 | i++ 211 | buf = make([]byte, l) 212 | size, err = file.ReadAt(buf, i) 213 | buf = buf[:size] 214 | } 215 | 216 | if reg.Match(buf) { 217 | found = true 218 | 219 | repRes := reg.ReplaceString(buf, rep) 220 | 221 | rl := int64(len(repRes)) 222 | if rl == l { 223 | file.WriteAt(repRes, i) 224 | file.Sync() 225 | }else if rl < l { 226 | file.WriteAt(repRes, i) 227 | rl = l - rl 228 | 229 | j := i+l 230 | 231 | b := make([]byte, 1024) 232 | s, e := file.ReadAt(b, j) 233 | b = b[:s] 234 | 235 | for e == nil { 236 | file.WriteAt(b, j-rl) 237 | j += 1024 238 | b = make([]byte, 1024) 239 | s, e = file.ReadAt(b, j) 240 | b = b[:s] 241 | } 242 | 243 | if s != 0 { 244 | file.WriteAt(b, j-rl) 245 | j += int64(s) 246 | } 247 | 248 | file.Truncate(j-rl) 249 | file.Sync() 250 | }else if rl > l { 251 | rl -= l 252 | 253 | dif := int64(1024) 254 | if rl > dif { 255 | dif = rl 256 | } 257 | 258 | j := i+l 259 | 260 | b := make([]byte, dif) 261 | s, e := file.ReadAt(b, j) 262 | bw := b[:s] 263 | 264 | file.WriteAt(repRes, i) 265 | j += rl 266 | 267 | for e == nil { 268 | b = make([]byte, dif) 269 | s, e = file.ReadAt(b, j+dif-rl) 270 | 271 | file.WriteAt(bw, j) 272 | bw = b[:s] 273 | 274 | j += dif 275 | } 276 | 277 | file.WriteAt(bw, j) 278 | file.Sync() 279 | } 280 | } 281 | 282 | file.Sync() 283 | file.Close() 284 | 285 | if !found { 286 | return io.EOF 287 | } 288 | return nil 289 | } 290 | 291 | // ReplaceFileFunc replaces a regex match with the result of a callback function in a file 292 | // 293 | // @all: if true, will replace all text matching @re, 294 | // if false, will only replace the first occurrence 295 | func (reg *Regexp) ReplaceFileFunc(name string, rep func(data func(int) []byte) []byte, all bool, maxReSize ...int64) error { 296 | stat, err := os.Stat(name) 297 | if err != nil || stat.IsDir() { 298 | return err 299 | } 300 | 301 | file, err := os.OpenFile(name, os.O_RDWR, stat.Mode().Perm()) 302 | if err != nil { 303 | return err 304 | } 305 | defer file.Close() 306 | 307 | var found bool 308 | 309 | l := int64(reg.len * 10) 310 | if l < 1024 { 311 | l = 1024 312 | } 313 | for _, maxRe := range maxReSize { 314 | if l < maxRe { 315 | l = maxRe 316 | } 317 | } 318 | 319 | i := int64(0) 320 | 321 | buf := make([]byte, l) 322 | size, err := file.ReadAt(buf, i) 323 | buf = buf[:size] 324 | for err == nil { 325 | if reg.Match(buf) { 326 | found = true 327 | 328 | repRes := reg.ReplaceFunc(buf, rep) 329 | 330 | rl := int64(len(repRes)) 331 | if rl == l { 332 | file.WriteAt(repRes, i) 333 | file.Sync() 334 | }else if rl < l { 335 | file.WriteAt(repRes, i) 336 | rl = l - rl 337 | 338 | j := i+l 339 | 340 | b := make([]byte, 1024) 341 | s, e := file.ReadAt(b, j) 342 | b = b[:s] 343 | 344 | for e == nil { 345 | file.WriteAt(b, j-rl) 346 | j += 1024 347 | b = make([]byte, 1024) 348 | s, e = file.ReadAt(b, j) 349 | b = b[:s] 350 | } 351 | 352 | if s != 0 { 353 | file.WriteAt(b, j-rl) 354 | j += int64(s) 355 | } 356 | 357 | file.Truncate(j-rl) 358 | file.Sync() 359 | }else if rl > l { 360 | rl -= l 361 | 362 | dif := int64(1024) 363 | if rl > dif { 364 | dif = rl 365 | } 366 | 367 | j := i+l 368 | 369 | b := make([]byte, dif) 370 | s, e := file.ReadAt(b, j) 371 | bw := b[:s] 372 | 373 | file.WriteAt(repRes, i) 374 | j += rl 375 | 376 | for e == nil { 377 | b = make([]byte, dif) 378 | s, e = file.ReadAt(b, j+dif-rl) 379 | 380 | file.WriteAt(bw, j) 381 | bw = b[:s] 382 | 383 | j += dif 384 | } 385 | 386 | file.WriteAt(bw, j) 387 | file.Sync() 388 | } 389 | 390 | if !all { 391 | file.Sync() 392 | file.Close() 393 | return nil 394 | } 395 | 396 | i += int64(len(repRes)) 397 | } 398 | 399 | i++ 400 | buf = make([]byte, l) 401 | size, err = file.ReadAt(buf, i) 402 | buf = buf[:size] 403 | } 404 | 405 | if reg.Match(buf) { 406 | found = true 407 | 408 | repRes := reg.ReplaceFunc(buf, rep) 409 | 410 | rl := int64(len(repRes)) 411 | if rl == l { 412 | file.WriteAt(repRes, i) 413 | file.Sync() 414 | }else if rl < l { 415 | file.WriteAt(repRes, i) 416 | rl = l - rl 417 | 418 | j := i+l 419 | 420 | b := make([]byte, 1024) 421 | s, e := file.ReadAt(b, j) 422 | b = b[:s] 423 | 424 | for e == nil { 425 | file.WriteAt(b, j-rl) 426 | j += 1024 427 | b = make([]byte, 1024) 428 | s, e = file.ReadAt(b, j) 429 | b = b[:s] 430 | } 431 | 432 | if s != 0 { 433 | file.WriteAt(b, j-rl) 434 | j += int64(s) 435 | } 436 | 437 | file.Truncate(j-rl) 438 | file.Sync() 439 | }else if rl > l { 440 | rl -= l 441 | 442 | dif := int64(1024) 443 | if rl > dif { 444 | dif = rl 445 | } 446 | 447 | j := i+l 448 | 449 | b := make([]byte, dif) 450 | s, e := file.ReadAt(b, j) 451 | bw := b[:s] 452 | 453 | file.WriteAt(repRes, i) 454 | j += rl 455 | 456 | for e == nil { 457 | b = make([]byte, dif) 458 | s, e = file.ReadAt(b, j+dif-rl) 459 | 460 | file.WriteAt(bw, j) 461 | bw = b[:s] 462 | 463 | j += dif 464 | } 465 | 466 | file.WriteAt(bw, j) 467 | file.Sync() 468 | } 469 | } 470 | 471 | file.Sync() 472 | file.Close() 473 | 474 | if !found { 475 | return io.EOF 476 | } 477 | return nil 478 | } 479 | --------------------------------------------------------------------------------