├── .gitignore
├── LICENSE
├── README.md
├── ahocorasick.go
├── ahocorasick_test.go
├── go.mod
└── go.sum


/.gitignore:
--------------------------------------------------------------------------------
1 | biblio.test
2 | *.pprof
3 | *.pdf
4 | sandbox/
5 | cmd/cmd
6 | main
7 | biblio_grep_rg_benchmarks.*
8 | ~$biblio_grep_rg_benchmarks.xlsx
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2019 Adam P. Regasz-Rethy
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ahocorasick
 2 | 
 3 | The fastest Golang implementation of the Aho-Corasick algorithm for string-searching.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```bash
 8 | go get github.com/rrethy/ahocorasick@v1.0.0
 9 | ```
10 | 
11 | [Documentation](https://godoc.org/github.com/RRethy/ahocorasick)
12 | 
13 | ```go
14 | matcher := CompileByteSlices([][]byte{
15 | 	[]byte("he"),
16 | 	[]byte("she"),
17 | 	[]byte("his"),
18 | 	[]byte("hers"),
19 | 	[]byte("she"),
20 | })
21 | fmt.Print(matcher.FindAllByteSlice([]byte("ushers")))
22 | 
23 | // Output:
24 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }]
25 | ```
26 | 
27 | ```go
28 | matcher := CompileStrings([]string{
29 | 	"he",
30 | 	"she",
31 | 	"his",
32 | 	"hers",
33 | 	"she",
34 | })
35 | fmt.Print(matcher.FindAllString("ushers"))
36 | }
37 | 
38 | // Output:
39 | // [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }]
40 | ```
41 | 
42 | ## Benchmarks
43 | 
44 | *macOS Mojave version 10.14.6*
45 | 
46 | *MacBook Pro (Retina, 13-inch, Early 2015)*
47 | 
48 | *Processor 3.1 GHz Intel Core i7*
49 | 
50 | 
51 | ```
52 | $ git co d7354e5e7912add9c2c602aae74c508bca3b2f4d; go test -bench=Benchmark
53 | ```
54 | 
55 | The two basic operations are the compilation of the state machine from an array of patterns (`Compile`), and the usage of this state machine to find each pattern in text (`FindAll`). Other implementations call these operations under different names.
56 | 
57 | | Operation | Input Size | rrethy/ahocorasick | [BobuSumisu/aho-corasick](https://github.com/BobuSumisu/aho-corasick) | [anknown/ahocorasick](https://github.com/anknown/ahocorasick) |
58 | | - | - | - | - | - |
59 | | - | - | Double-Array Trie | LinkedList Trie | Double-Array Trie |
60 | | - | - | - | - | - |
61 | | `Compile` | 235886 patterns | **133 ms** | 214 ms | 1408 ms |
62 | | `Compile` | 23589 patterns  | **20 ms** | 50 ms  | 137 ms |
63 | | `Compile` | 2359 patterns   | **3320 µs** | 11026 µs | 10506 µs |
64 | | `Compile` | 236 patterns    | **229 µs**| 1377 µs| 867s µs |
65 | | `Compile` | 24 patterns     | **43 µs**| 144 µs| 82s µs |
66 | | - | - | - | - | - |
67 | | `FindAll` | 3227439 bytes | **36 ms** | 38 ms | 116 ms |
68 | | `FindAll` | 318647 bytes  | **3641 µs** | 3764 µs | 11335 µs |
69 | | `FindAll` | 31626 bytes   | **359 µs** | 370 µs | 1103 µs |
70 | | `FindAll` | 3657 bytes    | **31 µs** | 40 µs | 131 µs |
71 | 
72 | **NOTE**: `FindAll` uses a state machine compiled from 2359 patterns.
73 | 
74 | **NOTE**: `FindAll` time does **not** include the `Compile` time for the state machine.
75 | 
76 | ### Reference Papers
77 | 
78 | [1] A. V. Aho, M. J. Corasick, "Efficient String Matching: An Aid to Bibliographic Search," Communications of the ACM, vol. 18, no. 6, pp. 333-340, June 1975.
79 | 
80 | [2] J.I. Aoe, "An Efficient Digital Search Algorithm by Using a Doble-Array Structure," IEEE Transactions on Software Engineering, vol. 15, no. 9, pp. 1066-1077, September 1989.
81 | 
82 | [3] J.I. Aoe, K. Morimoto, T. Sato, "An Efficient Implementation of Trie Stuctures," Software - Practice and Experience, vol. 22, no.9, pp. 695-721, September 1992.
83 | 
84 | ## License
85 | 
86 | `MIT`
87 | 


--------------------------------------------------------------------------------
/ahocorasick.go:
--------------------------------------------------------------------------------
  1 | // Package ahocorasick implements the Aho-Corasick string matching algorithm for
  2 | // efficiently finding all instances of multiple patterns in a text.
  3 | package ahocorasick
  4 | 
  5 | import (
  6 | 	"bytes"
  7 | 	"fmt"
  8 | 	"sort"
  9 | )
 10 | 
 11 | const (
 12 | 	// leaf represents a leaf on the trie
 13 | 	// This must be <255 since the offsets used are in [0,255]
 14 | 	// This should only appear in the Base array since the Check array uses
 15 | 	// negative values to represent free states.
 16 | 	leaf = -1867
 17 | )
 18 | 
 19 | // Matcher is the pattern matching state machine.
 20 | type Matcher struct {
 21 | 	base   []int   // base array in the double array trie
 22 | 	check  []int   // check array in the double array trie
 23 | 	fail   []int   // fail function
 24 | 	output [][]int // output function
 25 | }
 26 | 
 27 | func (m *Matcher) String() string {
 28 | 	return fmt.Sprintf(`
 29 | Base:   %v
 30 | Check:  %v
 31 | Fail:   %v
 32 | Output: %v
 33 | `, m.base, m.check, m.fail, m.output)
 34 | }
 35 | 
 36 | type byteSliceSlice [][]byte
 37 | 
 38 | func (bss byteSliceSlice) Len() int           { return len(bss) }
 39 | func (bss byteSliceSlice) Less(i, j int) bool { return bytes.Compare(bss[i], bss[j]) < 1 }
 40 | func (bss byteSliceSlice) Swap(i, j int)      { bss[i], bss[j] = bss[j], bss[i] }
 41 | 
 42 | func compile(words [][]byte) *Matcher {
 43 | 	m := new(Matcher)
 44 | 	m.base = make([]int, 2048)[:1]
 45 | 	m.check = make([]int, 2048)[:1]
 46 | 	m.fail = make([]int, 2048)[:1]
 47 | 	m.output = make([][]int, 2048)[:1]
 48 | 
 49 | 	sort.Sort(byteSliceSlice(words))
 50 | 
 51 | 	// Represents a node in the implicit trie of words
 52 | 	type trienode struct {
 53 | 		state int
 54 | 		depth int
 55 | 		start int
 56 | 		end   int
 57 | 	}
 58 | 	queue := make([]trienode, 2048)[:1]
 59 | 	queue[0] = trienode{0, 0, 0, len(words)}
 60 | 
 61 | 	for len(queue) > 0 {
 62 | 		node := queue[0]
 63 | 		queue = queue[1:]
 64 | 
 65 | 		if node.end <= node.start {
 66 | 			m.base[node.state] = leaf
 67 | 			continue
 68 | 		}
 69 | 
 70 | 		var edges []byte
 71 | 		for i := node.start; i < node.end; i++ {
 72 | 			if len(edges) == 0 || edges[len(edges)-1] != words[i][node.depth] {
 73 | 				edges = append(edges, words[i][node.depth])
 74 | 			}
 75 | 		}
 76 | 
 77 | 		// Calculate a suitable Base value where each edge will fit into the
 78 | 		// double array trie
 79 | 		base := m.findBase(edges)
 80 | 		m.base[node.state] = base
 81 | 
 82 | 		i := node.start
 83 | 		for _, edge := range edges {
 84 | 			offset := int(edge)
 85 | 			newState := base + offset
 86 | 
 87 | 			m.occupyState(newState, node.state)
 88 | 
 89 | 			// level 0 and level 1 should fail to state 0
 90 | 			if node.depth > 0 {
 91 | 				m.setFailState(newState, node.state, offset)
 92 | 			}
 93 | 			m.unionFailOutput(newState, m.fail[newState])
 94 | 
 95 | 			// Add the child nodes to the queue to continue down the BFS
 96 | 			newnode := trienode{newState, node.depth + 1, i, i}
 97 | 			for {
 98 | 				if newnode.depth >= len(words[i]) {
 99 | 					m.output[newState] = append(m.output[newState], len(words[i]))
100 | 					newnode.start++
101 | 				}
102 | 				newnode.end++
103 | 
104 | 				i++
105 | 				if i >= node.end || words[i][node.depth] != edge {
106 | 					break
107 | 				}
108 | 			}
109 | 			queue = append(queue, newnode)
110 | 		}
111 | 	}
112 | 
113 | 	return m
114 | }
115 | 
116 | // CompileByteSlices compiles a Matcher from a slice of byte slices. This Matcher can be
117 | // used to find occurrences of each pattern in a text.
118 | func CompileByteSlices(words [][]byte) *Matcher {
119 | 	return compile(words)
120 | }
121 | 
122 | // CompileStrings compiles a Matcher from a slice of strings. This Matcher can
123 | // be used to find occurrences of each pattern in a text.
124 | func CompileStrings(words []string) *Matcher {
125 | 	var wordByteSlices [][]byte
126 | 	for _, word := range words {
127 | 		wordByteSlices = append(wordByteSlices, []byte(word))
128 | 	}
129 | 	return compile(wordByteSlices)
130 | }
131 | 
132 | // occupyState will correctly occupy state so it maintains the
133 | // index=check[base[index]+offset] identity. It will also update the
134 | // bidirectional link of free states correctly.
135 | // Note: This MUST be used instead of simply modifying the check array directly
136 | // which is break the bidirectional link of free states.
137 | func (m *Matcher) occupyState(state, parentState int) {
138 | 	firstFreeState := m.firstFreeState()
139 | 	lastFreeState := m.lastFreeState()
140 | 	if firstFreeState == lastFreeState {
141 | 		m.check[0] = 0
142 | 	} else {
143 | 		switch state {
144 | 		case firstFreeState:
145 | 			next := -1 * m.check[state]
146 | 			m.check[0] = -1 * next
147 | 			m.base[next] = m.base[state]
148 | 		case lastFreeState:
149 | 			prev := -1 * m.base[state]
150 | 			m.base[firstFreeState] = -1 * prev
151 | 			m.check[prev] = -1
152 | 		default:
153 | 			next := -1 * m.check[state]
154 | 			prev := -1 * m.base[state]
155 | 			m.check[prev] = -1 * next
156 | 			m.base[next] = -1 * prev
157 | 		}
158 | 	}
159 | 	m.check[state] = parentState
160 | 	m.base[state] = leaf
161 | }
162 | 
163 | // setFailState sets the output of the fail function for input state. It will
164 | // traverse up the fail states of it's ancestors until it reaches a fail state
165 | // with a transition for offset.
166 | func (m *Matcher) setFailState(state, parentState, offset int) {
167 | 	failState := m.fail[parentState]
168 | 	for {
169 | 		if m.hasEdge(failState, offset) {
170 | 			m.fail[state] = m.base[failState] + offset
171 | 			break
172 | 		}
173 | 		if failState == 0 {
174 | 			break
175 | 		}
176 | 		failState = m.fail[failState]
177 | 	}
178 | }
179 | 
180 | // unionFailOutput unions the output function for failState with the output
181 | // function for state and sets the result as the output function for state.
182 | // This allows us to match substrings, commenting out this body would match
183 | // every word that is not a substring.
184 | func (m *Matcher) unionFailOutput(state, failState int) {
185 | 	m.output[state] = append([]int{}, m.output[failState]...)
186 | }
187 | 
188 | // findBase finds a base value which has free states in the positions that
189 | // correspond to each edge transition in edges. If this does not exist, then
190 | // base and check (and the fail array for consistency) will be extended just
191 | // enough to fit each transition.
192 | // The extension will maintain the bidirectional link of free states.
193 | func (m *Matcher) findBase(edges []byte) int {
194 | 	if len(edges) == 0 {
195 | 		return leaf
196 | 	}
197 | 
198 | 	min := int(edges[0])
199 | 	max := int(edges[len(edges)-1])
200 | 	width := max - min
201 | 	freeState := m.firstFreeState()
202 | 	for freeState != -1 {
203 | 		valid := true
204 | 		for _, e := range edges[1:] {
205 | 			state := freeState + int(e) - min
206 | 			if state >= len(m.check) {
207 | 				break
208 | 			} else if m.check[state] >= 0 {
209 | 				valid = false
210 | 				break
211 | 			}
212 | 		}
213 | 
214 | 		if valid {
215 | 			if freeState+width >= len(m.check) {
216 | 				m.increaseSize(width - len(m.check) + freeState + 1)
217 | 			}
218 | 			return freeState - min
219 | 		}
220 | 
221 | 		freeState = m.nextFreeState(freeState)
222 | 	}
223 | 	freeState = len(m.check)
224 | 	m.increaseSize(width + 1)
225 | 	return freeState - min
226 | }
227 | 
228 | // increaseSize increases the size of base, check, and fail to ensure they
229 | // remain the same size.
230 | // It also sets the default value for these new unoccupied states which form
231 | // bidirectional links to allow fast access to empty states. These
232 | // bidirectional links only pertain to base and check.
233 | //
234 | // Example:
235 | // m:
236 | //
237 | //	base:  [ 5 0 0 ]
238 | //	check: [ 0 0 0 ]
239 | //
240 | // increaseSize(3):
241 | //
242 | //	base:  [ 5  0 0 -5 -3 -4 ]
243 | //	check: [ -3 0 0 -4 -5 -1 ]
244 | //
245 | // increaseSize(3):
246 | //
247 | //	base:  [ 5  0 0 -8 -3 -4 -5 -6 -7]
248 | //	check: [ -3 0 0 -4 -5 -6 -7 -8 -1]
249 | //
250 | // m:
251 | //
252 | //	base:  [ 5 0 0 ]
253 | //	check: [ 0 0 0 ]
254 | //
255 | // increaseSize(1):
256 | //
257 | //	base:  [ 5  0 0 -3 ]
258 | //	check: [ -3 0 0 -1 ]
259 | //
260 | // increaseSize(1):
261 | //
262 | //	base:  [ 5  0 0 -4 -3 ]
263 | //	check: [ -3 0 0 -4 -1 ]
264 | //
265 | // increaseSize(1):
266 | //
267 | //	base:  [ 5  0 0 -5 -3 -4 ]
268 | //	check: [ -3 0 0 -4 -5 -1 ]
269 | func (m *Matcher) increaseSize(dsize int) {
270 | 	if dsize == 0 {
271 | 		return
272 | 	}
273 | 
274 | 	m.base = append(m.base, make([]int, dsize)...)
275 | 	m.check = append(m.check, make([]int, dsize)...)
276 | 	m.fail = append(m.fail, make([]int, dsize)...)
277 | 	m.output = append(m.output, make([][]int, dsize)...)
278 | 
279 | 	lastFreeState := m.lastFreeState()
280 | 	firstFreeState := m.firstFreeState()
281 | 	for i := len(m.check) - dsize; i < len(m.check); i++ {
282 | 		if lastFreeState == -1 {
283 | 			m.check[0] = -1 * i
284 | 			m.base[i] = -1 * i
285 | 			m.check[i] = -1
286 | 			firstFreeState = i
287 | 			lastFreeState = i
288 | 		} else {
289 | 			m.base[i] = -1 * lastFreeState
290 | 			m.check[i] = -1
291 | 			m.base[firstFreeState] = -1 * i
292 | 			m.check[lastFreeState] = -1 * i
293 | 			lastFreeState = i
294 | 		}
295 | 	}
296 | }
297 | 
298 | // nextFreeState uses the nature of the bidirectional link to determine the
299 | // closest free state at a larger index. Since the check array holds the
300 | // negative index of the next free state, except for the last free state which
301 | // has a value of -1, negating this value is the next free state.
302 | func (m *Matcher) nextFreeState(curFreeState int) int {
303 | 	nextState := -1 * m.check[curFreeState]
304 | 
305 | 	// state 1 can never be a free state.
306 | 	if nextState == 1 {
307 | 		return -1
308 | 	}
309 | 
310 | 	return nextState
311 | }
312 | 
313 | // firstFreeState uses the first value in the check array which points to the
314 | // first free state. A value of 0 means there are no free states and -1 is
315 | // returned.
316 | func (m *Matcher) firstFreeState() int {
317 | 	state := m.check[0]
318 | 	if state != 0 {
319 | 		return -1 * state
320 | 	}
321 | 	return -1
322 | }
323 | 
324 | // lastFreeState uses the base value of the first free state which points the
325 | // last free state.
326 | func (m *Matcher) lastFreeState() int {
327 | 	firstFree := m.firstFreeState()
328 | 	if firstFree != -1 {
329 | 		return -1 * m.base[firstFree]
330 | 	}
331 | 	return -1
332 | }
333 | 
334 | // hasEdge determines if the fromState has a transition for offset.
335 | func (m *Matcher) hasEdge(fromState, offset int) bool {
336 | 	toState := m.base[fromState] + offset
337 | 	return toState > 0 && toState < len(m.check) && m.check[toState] == fromState
338 | }
339 | 
340 | // Match represents a matched pattern in the text
341 | type Match struct {
342 | 	Word  []byte // the matched pattern
343 | 	Index int    // the start index of the match
344 | }
345 | 
346 | func (m *Match) String() string {
347 | 	return fmt.Sprintf(`{ "%s" %d }`, m.Word, m.Index)
348 | }
349 | 
350 | func (m *Matcher) findAll(text []byte) []*Match {
351 | 	var matches []*Match
352 | 	state := 0
353 | 	for i, b := range text {
354 | 		offset := int(b)
355 | 		for state != 0 && !m.hasEdge(state, offset) {
356 | 			state = m.fail[state]
357 | 		}
358 | 
359 | 		if m.hasEdge(state, offset) {
360 | 			state = m.base[state] + offset
361 | 		}
362 | 		for _, wordlen := range m.output[state] {
363 | 			matches = append(matches, &Match{text[i-wordlen+1 : i+1], i - wordlen + 1})
364 | 		}
365 | 	}
366 | 	return matches
367 | }
368 | 
369 | // FindAllByteSlice finds all instances of the patterns in the text.
370 | func (m *Matcher) FindAllByteSlice(text []byte) (matches []*Match) {
371 | 	return m.findAll(text)
372 | }
373 | 
374 | // FindAllString finds all instances of the patterns in the text.
375 | func (m *Matcher) FindAllString(text string) []*Match {
376 | 	return m.FindAllByteSlice([]byte(text))
377 | }
378 | 


--------------------------------------------------------------------------------
/ahocorasick_test.go:
--------------------------------------------------------------------------------
  1 | package ahocorasick
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"reflect"
  6 | 	"testing"
  7 | )
  8 | 
  9 | func convert(got []*Match) []Match {
 10 | 	var converted []Match
 11 | 	for _, matchptr := range got {
 12 | 		converted = append(converted, *matchptr)
 13 | 	}
 14 | 	return converted
 15 | }
 16 | 
 17 | func TestFindAllByteSlice(t *testing.T) {
 18 | 	m := compile([][]byte{
 19 | 		[]byte("he"),
 20 | 		[]byte("his"),
 21 | 		[]byte("hers"),
 22 | 		[]byte("she")},
 23 | 	)
 24 | 	m.findAll([]byte("ushers")) // => { "she" 1 }, { "he" 2}, { "hers" 2 }
 25 | 	tests := []struct {
 26 | 		patterns [][]byte
 27 | 		expected []Match
 28 | 		text     []byte
 29 | 	}{
 30 | 		{
 31 | 			[][]byte{[]byte("na"), []byte("ink"), []byte("ki")},
 32 | 			[]Match{{[]byte("ink"), 0}, {[]byte("ki"), 2}},
 33 | 			[]byte("inking"),
 34 | 		},
 35 | 		{
 36 | 			[][]byte{[]byte("ca"), []byte("erica"), []byte("rice")},
 37 | 			[]Match{{[]byte("ca"), 3}, {[]byte("erica"), 0}},
 38 | 			[]byte("erican"),
 39 | 		},
 40 | 		{
 41 | 			[][]byte{[]byte("he"), []byte("she"), []byte("his"), []byte("hers")},
 42 | 			[]Match{{[]byte("he"), 2}, {[]byte("she"), 1}, {[]byte("hers"), 2}},
 43 | 			[]byte("ushers"),
 44 | 		},
 45 | 		{
 46 | 			[][]byte{[]byte("they"), []byte("their"), []byte("theyre"), []byte("the"), []byte("tea"), []byte("te"), []byte("team"), []byte("go"), []byte("goo"), []byte("good"), []byte("oode")},
 47 | 			[]Match{{[]byte("the"), 0}, {[]byte("they"), 0}, {[]byte("theyre"), 0}, {[]byte("go"), 13}, {[]byte("goo"), 13}, {[]byte("good"), 13}, {[]byte("oode"), 14}, {[]byte("te"), 19}, {[]byte("tea"), 19}, {[]byte("team"), 19}},
 48 | 			[]byte("theyre not a goode team"),
 49 | 		},
 50 | 		{
 51 | 			[][]byte{[]byte("a")},
 52 | 			[]Match{{[]byte("a"), 0}, {[]byte("a"), 1}, {[]byte("a"), 2}, {[]byte("a"), 5}, {[]byte("a"), 7}, {[]byte("a"), 9}, {[]byte("a"), 11}},
 53 | 			[]byte("aaabbabababa"),
 54 | 		},
 55 | 		{
 56 | 			[][]byte{},
 57 | 			[]Match{},
 58 | 			[]byte("there is no patterns"),
 59 | 		},
 60 | 		{
 61 | 			[][]byte{[]byte("锅"), []byte("持有人"), []byte("potholderz"), []byte("MF DOOM")},
 62 | 			[]Match{{[]byte("potholderz"), 0}, {[]byte("MF DOOM"), 14}, {[]byte("锅"), 39}, {[]byte("持有人"), 43}},
 63 | 			[]byte("potholderz by MF DOOM hot shit aw shit 锅 持有人"),
 64 | 		},
 65 | 	}
 66 | 	for _, test := range tests {
 67 | 		matcher := compile(test.patterns)
 68 | 		got := matcher.findAll(test.text)
 69 | 		gotConverted := convert(got)
 70 | 		if !(len(got) == 0 && len(test.expected) == 0) &&
 71 | 			!reflect.DeepEqual(gotConverted, test.expected) {
 72 | 			t.Errorf(`
 73 |         Text:     %s
 74 | 		Expected: %v
 75 | 		Got:      %v
 76 | 		`, test.text, test.expected, gotConverted)
 77 | 		}
 78 | 	}
 79 | }
 80 | 
 81 | func TestIncreaseSize(t *testing.T) {
 82 | 	m := &Matcher{
 83 | 		[]int{5, 0, 0},
 84 | 		[]int{0, 0, 0},
 85 | 		[]int{0, 0, 0},
 86 | 		[][]int{},
 87 | 	}
 88 | 	m.increaseSize(1)
 89 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -3}) {
 90 | 		t.Errorf("Got: %v\n", m.base)
 91 | 	}
 92 | 	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -1}) {
 93 | 		t.Errorf("Got: %v\n", m.check)
 94 | 	}
 95 | 
 96 | 	m.increaseSize(1)
 97 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -4, -3}) {
 98 | 		t.Errorf("Got: %v\n", m.base)
 99 | 	}
100 | 	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -1}) {
101 | 		t.Errorf("Got: %v\n", m.check)
102 | 	}
103 | 
104 | 	m.increaseSize(1)
105 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) {
106 | 		t.Errorf("Got: %v\n", m.base)
107 | 	}
108 | 	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) {
109 | 		t.Errorf("Got: %v\n", m.check)
110 | 	}
111 | 
112 | 	m = &Matcher{
113 | 		[]int{5, 0, 0},
114 | 		[]int{0, 0, 0},
115 | 		[]int{0, 0, 0},
116 | 		[][]int{},
117 | 	}
118 | 	m.increaseSize(3)
119 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -5, -3, -4}) {
120 | 		t.Errorf("Got: %v\n", m.base)
121 | 	}
122 | 	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -1}) {
123 | 		t.Errorf("Got: %v\n", m.check)
124 | 	}
125 | 
126 | 	m.increaseSize(3)
127 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -8, -3, -4, -5, -6, -7}) {
128 | 		t.Errorf("Got: %v\n", m.base)
129 | 	}
130 | 	if !reflect.DeepEqual(m.check, []int{-3, 0, 0, -4, -5, -6, -7, -8, -1}) {
131 | 		t.Errorf("Got: %v\n", m.check)
132 | 	}
133 | 
134 | 	m = &Matcher{
135 | 		[]int{0},
136 | 		[]int{0},
137 | 		[]int{0},
138 | 		[][]int{},
139 | 	}
140 | 	m.increaseSize(5)
141 | 	if !reflect.DeepEqual(m.base, []int{0, -5, -1, -2, -3, -4}) {
142 | 		t.Errorf("Got: %v\n", m.base)
143 | 	}
144 | 	if !reflect.DeepEqual(m.check, []int{-1, -2, -3, -4, -5, -1}) {
145 | 		t.Errorf("Got: %v\n", m.check)
146 | 	}
147 | 
148 | 	m = &Matcher{
149 | 		[]int{-103, -1867},
150 | 		[]int{0, 0},
151 | 		[]int{},
152 | 		[][]int{},
153 | 	}
154 | 	m.increaseSize(5)
155 | 	if !reflect.DeepEqual(m.base, []int{-103, -1867, -6, -2, -3, -4, -5}) {
156 | 		t.Errorf("Got: %v\n", m.base)
157 | 	}
158 | 	if !reflect.DeepEqual(m.check, []int{-2, 0, -3, -4, -5, -6, -1}) {
159 | 		t.Errorf("Got: %v\n", m.check)
160 | 	}
161 | }
162 | 
163 | func TestNextFreeState(t *testing.T) {
164 | 	m := &Matcher{
165 | 		[]int{5, 0, 0, -3},
166 | 		[]int{-3, 0, 0, -1},
167 | 		[]int{},
168 | 		[][]int{},
169 | 	}
170 | 	nextState := m.nextFreeState(3)
171 | 	if nextState != -1 {
172 | 		t.Errorf("Got: %d\n", nextState)
173 | 	}
174 | 
175 | 	m.increaseSize(3)
176 | 	nextState = m.nextFreeState(3)
177 | 	if nextState != 4 {
178 | 		t.Errorf("Got: %d\n", nextState)
179 | 	}
180 | }
181 | 
182 | func TestOccupyState(t *testing.T) {
183 | 	m := &Matcher{
184 | 		[]int{5, 0, 0, -3},
185 | 		[]int{-3, 0, 0, -1},
186 | 		[]int{},
187 | 		[][]int{},
188 | 	}
189 | 	m.increaseSize(5)
190 | 	m.occupyState(3, 1)
191 | 	m.occupyState(4, 1)
192 | 	m.occupyState(8, 1)
193 | 	m.occupyState(6, 1)
194 | 	m.occupyState(5, 1)
195 | 	m.occupyState(7, 1)
196 | 	if !reflect.DeepEqual(m.base, []int{5, 0, 0, -1867, -1867, -1867, -1867, -1867, -1867}) {
197 | 		t.Errorf("Got: %v\n", m.base)
198 | 	}
199 | 	if !reflect.DeepEqual(m.check, []int{0, 0, 0, 1, 1, 1, 1, 1, 1}) {
200 | 		t.Errorf("Got: %v\n", m.check)
201 | 	}
202 | }
203 | 
204 | func ExampleMatcher_FindAllByteSlice() {
205 | 	matcher := CompileByteSlices([][]byte{
206 | 		[]byte("he"),
207 | 		[]byte("she"),
208 | 		[]byte("his"),
209 | 		[]byte("hers"),
210 | 		[]byte("she"),
211 | 	})
212 | 	fmt.Print(matcher.FindAllByteSlice([]byte("ushers")))
213 | 
214 | 	// Output:
215 | 	// [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }]
216 | }
217 | 
218 | func ExampleMatcher_FindAllString() {
219 | 	matcher := CompileStrings([]string{
220 | 		"he",
221 | 		"she",
222 | 		"his",
223 | 		"hers",
224 | 		"she",
225 | 	})
226 | 	fmt.Print(matcher.FindAllString("ushers"))
227 | 
228 | 	// Output:
229 | 	// [{ "he" 2 } { "she" 1 } { "she" 1 } { "hers" 2 }]
230 | }
231 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/rrethy/ahocorasick
2 | 
3 | go 1.19
4 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RRethy/ahocorasick/08c7d453a072a4d8528ff42708a16758d45d7a8d/go.sum


--------------------------------------------------------------------------------