├── .gitignore ├── LICENSE ├── README.md ├── ahocorasick.go ├── ahocorasick_test.go ├── go.mod └── go.sum /.gitignore: -------------------------------------------------------------------------------- 1 | biblio.test 2 | *.pprof 3 | *.pdf 4 | sandbox/ 5 | cmd/cmd 6 | main 7 | biblio_grep_rg_benchmarks.* 8 | ~$biblio_grep_rg_benchmarks.xlsx 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Adam P. Regasz-Rethy 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ahocorasick 2 | 3 | The fastest Golang implementation of the Aho-Corasick algorithm for string-searching. 4 | 5 | ## Usage 6 | 7 | ```bash 8 | go get github.com/rrethy/ahocorasick@v1.0.0 9 | ``` 10 | 11 | [Documentation](https://godoc.org/github.com/RRethy/ahocorasick) 12 | 13 | ```go 14 | matcher := CompileByteSlices([][]byte{ 15 | []byte("he"), 16 | []byte("she"), 17 | []byte("his"), 18 | []byte("hers"), 19 | []byte("she"), 20 | }) 21 | fmt.Print(matcher.FindAllByteSlice([]byte("ushers"))) 22 | 23 | // Output: 24 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }] 25 | ``` 26 | 27 | ```go 28 | matcher := CompileStrings([]string{ 29 | "he", 30 | "she", 31 | "his", 32 | "hers", 33 | "she", 34 | }) 35 | fmt.Print(matcher.FindAllString("ushers")) 36 | } 37 | 38 | // Output: 39 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }] 40 | ``` 41 | 42 | ## Benchmarks 43 | 44 | *macOS Mojave version 10.14.6* 45 | 46 | *MacBook Pro (Retina, 13-inch, Early 2015)* 47 | 48 | *Processor 3.1 GHz Intel Core i7* 49 | 50 | 51 | ``` 52 | $ git co d7354e5e7912add9c2c602aae74c508bca3b2f4d; go test -bench=Benchmark 53 | ``` 54 | 55 | The two basic operations are the compilation of the state machine from an array of patterns (`Compile`), and the usage of this state machine to find each pattern in text (`FindAll`). Other implementations call these operations under different names. 56 | 57 | | Operation | Input Size | rrethy/ahocorasick | [BobuSumisu/aho-corasick](https://github.com/BobuSumisu/aho-corasick) | [anknown/ahocorasick](https://github.com/anknown/ahocorasick) | 58 | | - | - | - | - | - | 59 | | - | - | Double-Array Trie | LinkedList Trie | Double-Array Trie | 60 | | - | - | - | - | - | 61 | | `Compile` | 235886 patterns | **133 ms** | 214 ms | 1408 ms | 62 | | `Compile` | 23589 patterns | **20 ms** | 50 ms | 137 ms | 63 | | `Compile` | 2359 patterns | **3320 µs** | 11026 µs | 10506 µs | 64 | | `Compile` | 236 patterns | **229 µs**| 1377 µs| 867s µs | 65 | | `Compile` | 24 patterns | **43 µs**| 144 µs| 82s µs | 66 | | - | - | - | - | - | 67 | | `FindAll` | 3227439 bytes | **36 ms** | 38 ms | 116 ms | 68 | | `FindAll` | 318647 bytes | **3641 µs** | 3764 µs | 11335 µs | 69 | | `FindAll` | 31626 bytes | **359 µs** | 370 µs | 1103 µs | 70 | | `FindAll` | 3657 bytes | **31 µs** | 40 µs | 131 µs | 71 | 72 | **NOTE**: `FindAll` uses a state machine compiled from 2359 patterns. 73 | 74 | **NOTE**: `FindAll` time does **not** include the `Compile` time for the state machine. 75 | 76 | ### Reference Papers 77 | 78 | [1] A. V. Aho, M. J. Corasick, "Efficient String Matching: An Aid to Bibliographic Search," Communications of the ACM, vol. 18, no. 6, pp. 333-340, June 1975. 79 | 80 | [2] J.I. Aoe, "An Efficient Digital Search Algorithm by Using a Doble-Array Structure," IEEE Transactions on Software Engineering, vol. 15, no. 9, pp. 1066-1077, September 1989. 81 | 82 | [3] J.I. Aoe, K. Morimoto, T. Sato, "An Efficient Implementation of Trie Stuctures," Software - Practice and Experience, vol. 22, no.9, pp. 695-721, September 1992. 83 | 84 | ## License 85 | 86 | `MIT` 87 | -------------------------------------------------------------------------------- /ahocorasick.go: -------------------------------------------------------------------------------- 1 | // Package ahocorasick implements the Aho-Corasick string matching algorithm for 2 | // efficiently finding all instances of multiple patterns in a text. 3 | package ahocorasick 4 | 5 | import ( 6 | "bytes" 7 | "fmt" 8 | "sort" 9 | ) 10 | 11 | const ( 12 | // leaf represents a leaf on the trie 13 | // This must be <255 since the offsets used are in [0,255] 14 | // This should only appear in the Base array since the Check array uses 15 | // negative values to represent free states. 16 | leaf = -1867 17 | ) 18 | 19 | // Matcher is the pattern matching state machine. 20 | type Matcher struct { 21 | base []int // base array in the double array trie 22 | check []int // check array in the double array trie 23 | fail []int // fail function 24 | output [][]int // output function 25 | } 26 | 27 | func (m *Matcher) String() string { 28 | return fmt.Sprintf(` 29 | Base: %v 30 | Check: %v 31 | Fail: %v 32 | Output: %v 33 | `, m.base, m.check, m.fail, m.output) 34 | } 35 | 36 | type byteSliceSlice [][]byte 37 | 38 | func (bss byteSliceSlice) Len() int { return len(bss) } 39 | func (bss byteSliceSlice) Less(i, j int) bool { return bytes.Compare(bss[i], bss[j]) < 1 } 40 | func (bss byteSliceSlice) Swap(i, j int) { bss[i], bss[j] = bss[j], bss[i] } 41 | 42 | func compile(words [][]byte) *Matcher { 43 | m := new(Matcher) 44 | m.base = make([]int, 2048)[:1] 45 | m.check = make([]int, 2048)[:1] 46 | m.fail = make([]int, 2048)[:1] 47 | m.output = make([][]int, 2048)[:1] 48 | 49 | sort.Sort(byteSliceSlice(words)) 50 | 51 | // Represents a node in the implicit trie of words 52 | type trienode struct { 53 | state int 54 | depth int 55 | start int 56 | end int 57 | } 58 | queue := make([]trienode, 2048)[:1] 59 | queue[0] = trienode{0, 0, 0, len(words)} 60 | 61 | for len(queue) > 0 { 62 | node := queue[0] 63 | queue = queue[1:] 64 | 65 | if node.end <= node.start { 66 | m.base[node.state] = leaf 67 | continue 68 | } 69 | 70 | var edges []byte 71 | for i := node.start; i < node.end; i++ { 72 | if len(edges) == 0 || edges[len(edges)-1] != words[i][node.depth] { 73 | edges = append(edges, words[i][node.depth]) 74 | } 75 | } 76 | 77 | // Calculate a suitable Base value where each edge will fit into the 78 | // double array trie 79 | base := m.findBase(edges) 80 | m.base[node.state] = base 81 | 82 | i := node.start 83 | for _, edge := range edges { 84 | offset := int(edge) 85 | newState := base + offset 86 | 87 | m.occupyState(newState, node.state) 88 | 89 | // level 0 and level 1 should fail to state 0 90 | if node.depth > 0 { 91 | m.setFailState(newState, node.state, offset) 92 | } 93 | m.unionFailOutput(newState, m.fail[newState]) 94 | 95 | // Add the child nodes to the queue to continue down the BFS 96 | newnode := trienode{newState, node.depth + 1, i, i} 97 | for { 98 | if newnode.depth >= len(words[i]) { 99 | m.output[newState] = append(m.output[newState], len(words[i])) 100 | newnode.start++ 101 | } 102 | newnode.end++ 103 | 104 | i++ 105 | if i >= node.end || words[i][node.depth] != edge { 106 | break 107 | } 108 | } 109 | queue = append(queue, newnode) 110 | } 111 | } 112 | 113 | return m 114 | } 115 | 116 | // CompileByteSlices compiles a Matcher from a slice of byte slices. This Matcher can be 117 | // used to find occurrences of each pattern in a text. 118 | func CompileByteSlices(words [][]byte) *Matcher { 119 | return compile(words) 120 | } 121 | 122 | // CompileStrings compiles a Matcher from a slice of strings. This Matcher can 123 | // be used to find occurrences of each pattern in a text. 124 | func CompileStrings(words []string) *Matcher { 125 | var wordByteSlices [][]byte 126 | for _, word := range words { 127 | wordByteSlices = append(wordByteSlices, []byte(word)) 128 | } 129 | return compile(wordByteSlices) 130 | } 131 | 132 | // occupyState will correctly occupy state so it maintains the 133 | // index=check[base[index]+offset] identity. It will also update the 134 | // bidirectional link of free states correctly. 135 | // Note: This MUST be used instead of simply modifying the check array directly 136 | // which is break the bidirectional link of free states. 137 | func (m *Matcher) occupyState(state, parentState int) { 138 | firstFreeState := m.firstFreeState() 139 | lastFreeState := m.lastFreeState() 140 | if firstFreeState == lastFreeState { 141 | m.check[0] = 0 142 | } else { 143 | switch state { 144 | case firstFreeState: 145 | next := -1 * m.check[state] 146 | m.check[0] = -1 * next 147 | m.base[next] = m.base[state] 148 | case lastFreeState: 149 | prev := -1 * m.base[state] 150 | m.base[firstFreeState] = -1 * prev 151 | m.check[prev] = -1 152 | default: 153 | next := -1 * m.check[state] 154 | prev := -1 * m.base[state] 155 | m.check[prev] = -1 * next 156 | m.base[next] = -1 * prev 157 | } 158 | } 159 | m.check[state] = parentState 160 | m.base[state] = leaf 161 | } 162 | 163 | // setFailState sets the output of the fail function for input state. It will 164 | // traverse up the fail states of it's ancestors until it reaches a fail state 165 | // with a transition for offset. 166 | func (m *Matcher) setFailState(state, parentState, offset int) { 167 | failState := m.fail[parentState] 168 | for { 169 | if m.hasEdge(failState, offset) { 170 | m.fail[state] = m.base[failState] + offset 171 | break 172 | } 173 | if failState == 0 { 174 | break 175 | } 176 | failState = m.fail[failState] 177 | } 178 | } 179 | 180 | // unionFailOutput unions the output function for failState with the output 181 | // function for state and sets the result as the output function for state. 182 | // This allows us to match substrings, commenting out this body would match 183 | // every word that is not a substring. 184 | func (m *Matcher) unionFailOutput(state, failState int) { 185 | m.output[state] = append([]int{}, m.output[failState]...) 186 | } 187 | 188 | // findBase finds a base value which has free states in the positions that 189 | // correspond to each edge transition in edges. If this does not exist, then 190 | // base and check (and the fail array for consistency) will be extended just 191 | // enough to fit each transition. 192 | // The extension will maintain the bidirectional link of free states. 193 | func (m *Matcher) findBase(edges []byte) int { 194 | if len(edges) == 0 { 195 | return leaf 196 | } 197 | 198 | min := int(edges[0]) 199 | max := int(edges[len(edges)-1]) 200 | width := max - min 201 | freeState := m.firstFreeState() 202 | for freeState != -1 { 203 | valid := true 204 | for _, e := range edges[1:] { 205 | state := freeState + int(e) - min 206 | if state >= len(m.check) { 207 | break 208 | } else if m.check[state] >= 0 { 209 | valid = false 210 | break 211 | } 212 | } 213 | 214 | if valid { 215 | if freeState+width >= len(m.check) { 216 | m.increaseSize(width - len(m.check) + freeState + 1) 217 | } 218 | return freeState - min 219 | } 220 | 221 | freeState = m.nextFreeState(freeState) 222 | } 223 | freeState = len(m.check) 224 | m.increaseSize(width + 1) 225 | return freeState - min 226 | } 227 | 228 | // increaseSize increases the size of base, check, and fail to ensure they 229 | // remain the same size. 230 | // It also sets the default value for these new unoccupied states which form 231 | // bidirectional links to allow fast access to empty states. These 232 | // bidirectional links only pertain to base and check. 233 | // 234 | // Example: 235 | // m: 236 | // 237 | // base: [ 5 0 0 ] 238 | // check: [ 0 0 0 ] 239 | // 240 | // increaseSize(3): 241 | // 242 | // base: [ 5 0 0 -5 -3 -4 ] 243 | // check: [ -3 0 0 -4 -5 -1 ] 244 | // 245 | // increaseSize(3): 246 | // 247 | // base: [ 5 0 0 -8 -3 -4 -5 -6 -7] 248 | // check: [ -3 0 0 -4 -5 -6 -7 -8 -1] 249 | // 250 | // m: 251 | // 252 | // base: [ 5 0 0 ] 253 | // check: [ 0 0 0 ] 254 | // 255 | // increaseSize(1): 256 | // 257 | // base: [ 5 0 0 -3 ] 258 | // check: [ -3 0 0 -1 ] 259 | // 260 | // increaseSize(1): 261 | // 262 | // base: [ 5 0 0 -4 -3 ] 263 | // check: [ -3 0 0 -4 -1 ] 264 | // 265 | // increaseSize(1): 266 | // 267 | // base: [ 5 0 0 -5 -3 -4 ] 268 | // check: [ -3 0 0 -4 -5 -1 ] 269 | func (m *Matcher) increaseSize(dsize int) { 270 | if dsize == 0 { 271 | return 272 | } 273 | 274 | m.base = append(m.base, make([]int, dsize)...) 275 | m.check = append(m.check, make([]int, dsize)...) 276 | m.fail = append(m.fail, make([]int, dsize)...) 277 | m.output = append(m.output, make([][]int, dsize)...) 278 | 279 | lastFreeState := m.lastFreeState() 280 | firstFreeState := m.firstFreeState() 281 | for i := len(m.check) - dsize; i < len(m.check); i++ { 282 | if lastFreeState == -1 { 283 | m.check[0] = -1 * i 284 | m.base[i] = -1 * i 285 | m.check[i] = -1 286 | firstFreeState = i 287 | lastFreeState = i 288 | } else { 289 | m.base[i] = -1 * lastFreeState 290 | m.check[i] = -1 291 | m.base[firstFreeState] = -1 * i 292 | m.check[lastFreeState] = -1 * i 293 | lastFreeState = i 294 | } 295 | } 296 | } 297 | 298 | // nextFreeState uses the nature of the bidirectional link to determine the 299 | // closest free state at a larger index. Since the check array holds the 300 | // negative index of the next free state, except for the last free state which 301 | // has a value of -1, negating this value is the next free state. 302 | func (m *Matcher) nextFreeState(curFreeState int) int { 303 | nextState := -1 * m.check[curFreeState] 304 | 305 | // state 1 can never be a free state. 306 | if nextState == 1 { 307 | return -1 308 | } 309 | 310 | return nextState 311 | } 312 | 313 | // firstFreeState uses the first value in the check array which points to the 314 | // first free state. A value of 0 means there are no free states and -1 is 315 | // returned. 316 | func (m *Matcher) firstFreeState() int { 317 | state := m.check[0] 318 | if state != 0 { 319 | return -1 * state 320 | } 321 | return -1 322 | } 323 | 324 | // lastFreeState uses the base value of the first free state which points the 325 | // last free state. 326 | func (m *Matcher) lastFreeState() int { 327 | firstFree := m.firstFreeState() 328 | if firstFree != -1 { 329 | return -1 * m.base[firstFree] 330 | } 331 | return -1 332 | } 333 | 334 | // hasEdge determines if the fromState has a transition for offset. 335 | func (m *Matcher) hasEdge(fromState, offset int) bool { 336 | toState := m.base[fromState] + offset 337 | return toState > 0 && toState < len(m.check) && m.check[toState] == fromState 338 | } 339 | 340 | // Match represents a matched pattern in the text 341 | type Match struct { 342 | Word []byte // the matched pattern 343 | Index int // the start index of the match 344 | } 345 | 346 | func (m *Match) String() string { 347 | return fmt.Sprintf(`{ "%s" %d }`, m.Word, m.Index) 348 | } 349 | 350 | func (m *Matcher) findAll(text []byte) []*Match { 351 | var matches []*Match 352 | state := 0 353 | for i, b := range text { 354 | offset := int(b) 355 | for state != 0 && !m.hasEdge(state, offset) { 356 | state = m.fail[state] 357 | } 358 | 359 | if m.hasEdge(state, offset) { 360 | state = m.base[state] + offset 361 | } 362 | for _, wordlen := range m.output[state] { 363 | matches = append(matches, &Match{text[i-wordlen+1 : i+1], i - wordlen + 1}) 364 | } 365 | } 366 | return matches 367 | } 368 | 369 | // FindAllByteSlice finds all instances of the patterns in the text. 370 | func (m *Matcher) FindAllByteSlice(text []byte) (matches []*Match) { 371 | return m.findAll(text) 372 | } 373 | 374 | // FindAllString finds all instances of the patterns in the text. 375 | func (m *Matcher) FindAllString(text string) []*Match { 376 | return m.FindAllByteSlice([]byte(text)) 377 | } 378 | -------------------------------------------------------------------------------- /ahocorasick_test.go: -------------------------------------------------------------------------------- 1 | package ahocorasick 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "testing" 7 | ) 8 | 9 | func convert(got []*Match) []Match { 10 | var converted []Match 11 | for _, matchptr := range got { 12 | converted = append(converted, *matchptr) 13 | } 14 | return converted 15 | } 16 | 17 | func TestFindAllByteSlice(t *testing.T) { 18 | m := compile([][]byte{ 19 | []byte("he"), 20 | []byte("his"), 21 | []byte("hers"), 22 | []byte("she")}, 23 | ) 24 | m.findAll([]byte("ushers")) // => { "she" 1 }, { "he" 2}, { "hers" 2 } 25 | tests := []struct { 26 | patterns [][]byte 27 | expected []Match 28 | text []byte 29 | }{ 30 | { 31 | [][]byte{[]byte("na"), []byte("ink"), []byte("ki")}, 32 | []Match{{[]byte("ink"), 0}, {[]byte("ki"), 2}}, 33 | []byte("inking"), 34 | }, 35 | { 36 | [][]byte{[]byte("ca"), []byte("erica"), []byte("rice")}, 37 | []Match{{[]byte("ca"), 3}, {[]byte("erica"), 0}}, 38 | []byte("erican"), 39 | }, 40 | { 41 | [][]byte{[]byte("he"), []byte("she"), []byte("his"), []byte("hers")}, 42 | []Match{{[]byte("he"), 2}, {[]byte("she"), 1}, {[]byte("hers"), 2}}, 43 | []byte("ushers"), 44 | }, 45 | { 46 | [][]byte{[]byte("they"), []byte("their"), []byte("theyre"), []byte("the"), []byte("tea"), []byte("te"), []byte("team"), []byte("go"), []byte("goo"), []byte("good"), []byte("oode")}, 47 | []Match{{[]byte("the"), 0}, {[]byte("they"), 0}, {[]byte("theyre"), 0}, {[]byte("go"), 13}, {[]byte("goo"), 13}, {[]byte("good"), 13}, {[]byte("oode"), 14}, {[]byte("te"), 19}, {[]byte("tea"), 19}, {[]byte("team"), 19}}, 48 | []byte("theyre not a goode team"), 49 | }, 50 | { 51 | [][]byte{[]byte("a")}, 52 | []Match{{[]byte("a"), 0}, {[]byte("a"), 1}, {[]byte("a"), 2}, {[]byte("a"), 5}, {[]byte("a"), 7}, {[]byte("a"), 9}, {[]byte("a"), 11}}, 53 | []byte("aaabbabababa"), 54 | }, 55 | { 56 | [][]byte{}, 57 | []Match{}, 58 | []byte("there is no patterns"), 59 | }, 60 | { 61 | [][]byte{[]byte("锅"), []byte("持有人"), []byte("potholderz"), []byte("MF DOOM")}, 62 | []Match{{[]byte("potholderz"), 0}, {[]byte("MF DOOM"), 14}, {[]byte("锅"), 39}, {[]byte("持有人"), 43}}, 63 | []byte("potholderz by MF DOOM hot shit aw shit 锅 持有人"), 64 | }, 65 | } 66 | for _, test := range tests { 67 | matcher := compile(test.patterns) 68 | got := matcher.findAll(test.text) 69 | gotConverted := convert(got) 70 | if !(len(got) == 0 && len(test.expected) == 0) && 71 | !reflect.DeepEqual(gotConverted, test.expected) { 72 | t.Errorf(` 73 | Text: %s 74 | Expected: %v 75 | Got: %v 76 | `, test.text, test.expected, gotConverted) 77 | } 78 | } 79 | } 80 | 81 | func TestIncreaseSize(t *testing.T) { 82 | m := &Matcher{ 83 | []int{5, 0, 0}, 84 | []int{0, 0, 0}, 85 | []int{0, 0, 0}, 86 | [][]int{}, 87 | } 88 | m.increaseSize(1) 89 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -3}) { 90 | t.Errorf("Got: %v\n", m.base) 91 | } 92 | if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -1}) { 93 | t.Errorf("Got: %v\n", m.check) 94 | } 95 | 96 | m.increaseSize(1) 97 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -4, -3}) { 98 | t.Errorf("Got: %v\n", m.base) 99 | } 100 | if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -1}) { 101 | t.Errorf("Got: %v\n", m.check) 102 | } 103 | 104 | m.increaseSize(1) 105 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) { 106 | t.Errorf("Got: %v\n", m.base) 107 | } 108 | if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) { 109 | t.Errorf("Got: %v\n", m.check) 110 | } 111 | 112 | m = &Matcher{ 113 | []int{5, 0, 0}, 114 | []int{0, 0, 0}, 115 | []int{0, 0, 0}, 116 | [][]int{}, 117 | } 118 | m.increaseSize(3) 119 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) { 120 | t.Errorf("Got: %v\n", m.base) 121 | } 122 | if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) { 123 | t.Errorf("Got: %v\n", m.check) 124 | } 125 | 126 | m.increaseSize(3) 127 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -8, -3, -4, -5, -6, -7}) { 128 | t.Errorf("Got: %v\n", m.base) 129 | } 130 | if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -6, -7, -8, -1}) { 131 | t.Errorf("Got: %v\n", m.check) 132 | } 133 | 134 | m = &Matcher{ 135 | []int{0}, 136 | []int{0}, 137 | []int{0}, 138 | [][]int{}, 139 | } 140 | m.increaseSize(5) 141 | if !reflect.DeepEqual(m.base, []int{0, -5, -1, -2, -3, -4}) { 142 | t.Errorf("Got: %v\n", m.base) 143 | } 144 | if !reflect.DeepEqual(m.check, []int{-1, -2, -3, -4, -5, -1}) { 145 | t.Errorf("Got: %v\n", m.check) 146 | } 147 | 148 | m = &Matcher{ 149 | []int{-103, -1867}, 150 | []int{0, 0}, 151 | []int{}, 152 | [][]int{}, 153 | } 154 | m.increaseSize(5) 155 | if !reflect.DeepEqual(m.base, []int{-103, -1867, -6, -2, -3, -4, -5}) { 156 | t.Errorf("Got: %v\n", m.base) 157 | } 158 | if !reflect.DeepEqual(m.check, []int{-2, 0, -3, -4, -5, -6, -1}) { 159 | t.Errorf("Got: %v\n", m.check) 160 | } 161 | } 162 | 163 | func TestNextFreeState(t *testing.T) { 164 | m := &Matcher{ 165 | []int{5, 0, 0, -3}, 166 | []int{-3, 0, 0, -1}, 167 | []int{}, 168 | [][]int{}, 169 | } 170 | nextState := m.nextFreeState(3) 171 | if nextState != -1 { 172 | t.Errorf("Got: %d\n", nextState) 173 | } 174 | 175 | m.increaseSize(3) 176 | nextState = m.nextFreeState(3) 177 | if nextState != 4 { 178 | t.Errorf("Got: %d\n", nextState) 179 | } 180 | } 181 | 182 | func TestOccupyState(t *testing.T) { 183 | m := &Matcher{ 184 | []int{5, 0, 0, -3}, 185 | []int{-3, 0, 0, -1}, 186 | []int{}, 187 | [][]int{}, 188 | } 189 | m.increaseSize(5) 190 | m.occupyState(3, 1) 191 | m.occupyState(4, 1) 192 | m.occupyState(8, 1) 193 | m.occupyState(6, 1) 194 | m.occupyState(5, 1) 195 | m.occupyState(7, 1) 196 | if !reflect.DeepEqual(m.base, []int{5, 0, 0, -1867, -1867, -1867, -1867, -1867, -1867}) { 197 | t.Errorf("Got: %v\n", m.base) 198 | } 199 | if !reflect.DeepEqual(m.check, []int{0, 0, 0, 1, 1, 1, 1, 1, 1}) { 200 | t.Errorf("Got: %v\n", m.check) 201 | } 202 | } 203 | 204 | func ExampleMatcher_FindAllByteSlice() { 205 | matcher := CompileByteSlices([][]byte{ 206 | []byte("he"), 207 | []byte("she"), 208 | []byte("his"), 209 | []byte("hers"), 210 | []byte("she"), 211 | }) 212 | fmt.Print(matcher.FindAllByteSlice([]byte("ushers"))) 213 | 214 | // Output: 215 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }] 216 | } 217 | 218 | func ExampleMatcher_FindAllString() { 219 | matcher := CompileStrings([]string{ 220 | "he", 221 | "she", 222 | "his", 223 | "hers", 224 | "she", 225 | }) 226 | fmt.Print(matcher.FindAllString("ushers")) 227 | 228 | // Output: 229 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }] 230 | } 231 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/rrethy/ahocorasick 2 | 3 | go 1.19 4 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RRethy/ahocorasick/08c7d453a072a4d8528ff42708a16758d45d7a8d/go.sum --------------------------------------------------------------------------------