├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   └── go.yml
├── go.mod
├── go.sum
├── fuzzy
    ├── fuzz_test.go
    ├── levenshtein.go
    ├── levenshtein_test.go
    ├── fuzzy.go
    └── fuzzy_test.go
├── LICENSE
└── README.md


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: lithammer
2 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/lithammer/fuzzysearch
2 | 
3 | go 1.24.0
4 | 
5 | require golang.org/x/text v0.31.0
6 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
2 | golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "gomod"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 | 
 8 |   - package-ecosystem: "github-actions"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "monthly"
12 | 


--------------------------------------------------------------------------------
/fuzzy/fuzz_test.go:
--------------------------------------------------------------------------------
 1 | package fuzzy
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func FuzzFind(f *testing.F) {
 9 | 	f.Fuzz(func(t *testing.T, n string, h []byte) {
10 | 		s := make([]string, len(h))
11 | 		for i, b := range h {
12 | 			s[i] = string(b)
13 | 		}
14 | 		Find(n, s)
15 | 		FindFold(n, s)
16 | 		FindNormalized(n, s)
17 | 		FindNormalizedFold(n, s)
18 | 		r := RankFind(n, s)
19 | 		sort.Sort(r)
20 | 		// No need to sort the other Rank calls;
21 | 		// assume first sort can catch any bugs.
22 | 		RankFindFold(n, s)
23 | 		RankFindNormalized(n, s)
24 | 		RankFindNormalizedFold(n, s)
25 | 		if len(s) > 0 {
26 | 			x := s[0]
27 | 			LevenshteinDistance(n, x)
28 | 			Match(n, x)
29 | 			MatchFold(n, x)
30 | 			MatchNormalized(n, x)
31 | 			MatchNormalizedFold(n, x)
32 | 		}
33 | 	})
34 | }
35 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   test:
13 |     strategy:
14 |       matrix:
15 |         platform: [ubuntu-latest, macos-latest, windows-latest]
16 |         go-version: ["1.24", "1.25"]
17 |     runs-on: ${{ matrix.platform }}
18 |     steps:
19 |       - name: Setup Go
20 |         uses: actions/setup-go@v6
21 |         with:
22 |           go-version: ${{ matrix.go-version }}
23 | 
24 |       - name: Checkout
25 |         uses: actions/checkout@v6
26 | 
27 |       - name: Download Go dependencies
28 |         run: go mod download
29 |         env:
30 |           GOPROXY: "https://proxy.golang.org"
31 | 
32 |       - name: Build
33 |         run: go build -v ./...
34 | 
35 |       - name: Test
36 |         run: go test -v ./...
37 | 
38 |       - name: Lint
39 |         uses: golangci/golangci-lint-action@v9.1.0
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Peter Lithammer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/fuzzy/levenshtein.go:
--------------------------------------------------------------------------------
 1 | package fuzzy
 2 | 
 3 | // LevenshteinDistance measures the difference between two strings.
 4 | // The Levenshtein distance between two words is the minimum number of
 5 | // single-character edits (i.e. insertions, deletions or substitutions)
 6 | // required to change one word into the other.
 7 | //
 8 | // This implemention is optimized to use O(min(m,n)) space and is based on the
 9 | // optimized C version found here:
10 | // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C
11 | func LevenshteinDistance(s, t string) int {
12 | 	r1, r2 := []rune(s), []rune(t)
13 | 	column := make([]int, 1, 64)
14 | 
15 | 	for y := 1; y <= len(r1); y++ {
16 | 		column = append(column, y)
17 | 	}
18 | 
19 | 	for x := 1; x <= len(r2); x++ {
20 | 		column[0] = x
21 | 
22 | 		for y, lastDiag := 1, x-1; y <= len(r1); y++ {
23 | 			oldDiag := column[y]
24 | 			cost := 0
25 | 			if r1[y-1] != r2[x-1] {
26 | 				cost = 1
27 | 			}
28 | 			column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost)
29 | 			lastDiag = oldDiag
30 | 		}
31 | 	}
32 | 
33 | 	return column[len(r1)]
34 | }
35 | 
36 | func min2(a, b int) int {
37 | 	if a < b {
38 | 		return a
39 | 	}
40 | 	return b
41 | }
42 | 
43 | func min(a, b, c int) int {
44 | 	return min2(min2(a, b), c)
45 | }
46 | 


--------------------------------------------------------------------------------
/fuzzy/levenshtein_test.go:
--------------------------------------------------------------------------------
 1 | package fuzzy
 2 | 
 3 | import "testing"
 4 | 
 5 | var levenshteinDistanceTests = []struct {
 6 | 	s, t   string
 7 | 	wanted int
 8 | }{
 9 | 	{"zazz", deBelloGallico + " zazz", 1544},
10 | 	{"zazz", "zazz " + deBelloGallico, 1544},
11 | 	{"a", "a", 0},
12 | 	{"ab", "ab", 0},
13 | 	{"ab", "aa", 1},
14 | 	{"ab", "aaa", 2},
15 | 	{"bbb", "a", 3},
16 | 	{"kitten", "sitting", 3},
17 | 	{"ёлка", "ёлочка", 2},
18 | 	{"ветер", "ёлочка", 6},
19 | 	{"中国", "中华人民共和国", 5},
20 | 	{"日本", "中华人民共和国", 7},
21 | }
22 | 
23 | func TestLevenshtein(t *testing.T) {
24 | 	for _, test := range levenshteinDistanceTests {
25 | 		distance := LevenshteinDistance(test.s, test.t)
26 | 		if distance != test.wanted {
27 | 			t.Errorf("got distance %d, expected %d for %s in %s",
28 | 				distance, test.wanted, test.s, test.t)
29 | 		}
30 | 	}
31 | }
32 | 
33 | func BenchmarkLevenshteinDistance(b *testing.B) {
34 | 	ldt := levenshteinDistanceTests[2]
35 | 	ldt2 := levenshteinDistanceTests[5]
36 | 	for i := 0; i < b.N; i++ {
37 | 		LevenshteinDistance(ldt.s, ldt.t)
38 | 		LevenshteinDistance(ldt2.s, ldt2.t)
39 | 	}
40 | }
41 | 
42 | func BenchmarkLevenshteinDistanceBigLate(b *testing.B) {
43 | 	ldt := levenshteinDistanceTests[0]
44 | 	for i := 0; i < b.N; i++ {
45 | 		LevenshteinDistance(ldt.s, ldt.t)
46 | 	}
47 | }
48 | 
49 | func BenchmarkLevenshteinDistanceBigEarly(b *testing.B) {
50 | 	ldt := levenshteinDistanceTests[1]
51 | 	for i := 0; i < b.N; i++ {
52 | 		LevenshteinDistance(ldt.s, ldt.t)
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fuzzy Search
 2 | 
 3 | Inspired by [bevacqua/fuzzysearch][1], a fuzzy matching library written in
 4 | JavaScript. But contains some extras like ranking using [Levenshtein
 5 | distance][2] and finding matches in a list of words.
 6 | 
 7 | Fuzzy searching allows for flexibly matching a string with partial input,
 8 | useful for filtering data very quickly based on lightweight user input.
 9 | 
10 | The current implementation uses the algorithm suggested by Mr. Aleph, a russian
11 | compiler engineer working at V8.
12 | 
13 | ## Install
14 | 
15 | ```
16 | go get github.com/lithammer/fuzzysearch/fuzzy
17 | ```
18 | 
19 | ## Usage
20 | 
21 | ```go
22 | package main
23 | 
24 | import "github.com/lithammer/fuzzysearch/fuzzy"
25 | 
26 | func main() {
27 | 	fuzzy.Match("twl", "cartwheel")  // true
28 | 	fuzzy.Match("cart", "cartwheel") // true
29 | 	fuzzy.Match("cw", "cartwheel")   // true
30 | 	fuzzy.Match("ee", "cartwheel")   // true
31 | 	fuzzy.Match("art", "cartwheel")  // true
32 | 	fuzzy.Match("eeel", "cartwheel") // false
33 | 	fuzzy.Match("dog", "cartwheel")  // false
34 | 	fuzzy.Match("kitten", "sitting") // false
35 | 	
36 | 	fuzzy.RankMatch("kitten", "sitting") // -1
37 | 	fuzzy.RankMatch("cart", "cartwheel") // 5
38 | 	
39 | 	words := []string{"cartwheel", "foobar", "wheel", "baz"}
40 | 	fuzzy.Find("whl", words) // [cartwheel wheel]
41 | 	
42 | 	fuzzy.RankFind("whl", words) // [{whl cartwheel 6 0} {whl wheel 2 2}]
43 | 	
44 | 	// Unicode normalized matching.
45 | 	fuzzy.MatchNormalized("cartwheel", "cartwhéél") // true
46 | 
47 | 	// Case insensitive matching.
48 | 	fuzzy.MatchFold("ArTeeL", "cartwheel") // true
49 | }
50 | ```
51 | 
52 | You can sort the result of a `fuzzy.RankFind()` call using the [`sort`][3]
53 | package in the standard library:
54 | 
55 | ```go
56 | matches := fuzzy.RankFind("whl", words) // [{whl cartwheel 6 0} {whl wheel 2 2}]
57 | sort.Sort(matches) // [{whl wheel 2 2} {whl cartwheel 6 0}]
58 | ```
59 | 
60 | See the [`fuzzy`][4] package documentation for more examples.
61 | 
62 | ## License
63 | 
64 | MIT
65 | 
66 | [1]: https://github.com/bevacqua/fuzzysearch
67 | [2]: http://en.wikipedia.org/wiki/Levenshtein_distance
68 | [3]: https://golang.org/pkg/sort/
69 | [4]: https://pkg.go.dev/github.com/lithammer/fuzzysearch/fuzzy
70 | 


--------------------------------------------------------------------------------
/fuzzy/fuzzy.go:
--------------------------------------------------------------------------------
  1 | // Fuzzy searching allows for flexibly matching a string with partial input,
  2 | // useful for filtering data very quickly based on lightweight user input.
  3 | package fuzzy
  4 | 
  5 | import (
  6 | 	"unicode"
  7 | 	"unicode/utf8"
  8 | 
  9 | 	"golang.org/x/text/runes"
 10 | 	"golang.org/x/text/transform"
 11 | 	"golang.org/x/text/unicode/norm"
 12 | )
 13 | 
 14 | func noopTransformer() transform.Transformer {
 15 | 	return nopTransformer{}
 16 | }
 17 | 
 18 | func foldTransformer() transform.Transformer {
 19 | 	return unicodeFoldTransformer{}
 20 | }
 21 | 
 22 | func normalizeTransformer() transform.Transformer {
 23 | 	return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
 24 | }
 25 | 
 26 | func normalizedFoldTransformer() transform.Transformer {
 27 | 	return transform.Chain(normalizeTransformer(), foldTransformer())
 28 | }
 29 | 
 30 | // Match returns true if source matches target using a fuzzy-searching
 31 | // algorithm. Note that it doesn't implement Levenshtein distance (see
 32 | // RankMatch instead), but rather a simplified version where there's no
 33 | // approximation. The method will return true only if each character in the
 34 | // source can be found in the target and occurs after the preceding matches.
 35 | func Match(source, target string) bool {
 36 | 	return match(source, target, noopTransformer())
 37 | }
 38 | 
 39 | // MatchFold is a case-insensitive version of Match.
 40 | func MatchFold(source, target string) bool {
 41 | 	return match(source, target, foldTransformer())
 42 | }
 43 | 
 44 | // MatchNormalized is a unicode-normalized version of Match.
 45 | func MatchNormalized(source, target string) bool {
 46 | 	return match(source, target, normalizeTransformer())
 47 | }
 48 | 
 49 | // MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match.
 50 | func MatchNormalizedFold(source, target string) bool {
 51 | 	return match(source, target, normalizedFoldTransformer())
 52 | }
 53 | 
 54 | func match(source, target string, transformer transform.Transformer) bool {
 55 | 	sourceT := stringTransform(source, transformer)
 56 | 	targetT := stringTransform(target, transformer)
 57 | 	return matchTransformed(sourceT, targetT)
 58 | }
 59 | 
 60 | func matchTransformed(source, target string) bool {
 61 | 	lenDiff := len(target) - len(source)
 62 | 
 63 | 	if lenDiff < 0 {
 64 | 		return false
 65 | 	}
 66 | 
 67 | 	if lenDiff == 0 && source == target {
 68 | 		return true
 69 | 	}
 70 | 
 71 | Outer:
 72 | 	for _, r1 := range source {
 73 | 		for i, r2 := range target {
 74 | 			if r1 == r2 {
 75 | 				target = target[i+utf8.RuneLen(r2):]
 76 | 				continue Outer
 77 | 			}
 78 | 		}
 79 | 		return false
 80 | 	}
 81 | 
 82 | 	return true
 83 | }
 84 | 
 85 | // Find will return a list of strings in targets that fuzzy matches source.
 86 | func Find(source string, targets []string) []string {
 87 | 	return find(source, targets, noopTransformer())
 88 | }
 89 | 
 90 | // FindFold is a case-insensitive version of Find.
 91 | func FindFold(source string, targets []string) []string {
 92 | 	return find(source, targets, foldTransformer())
 93 | }
 94 | 
 95 | // FindNormalized is a unicode-normalized version of Find.
 96 | func FindNormalized(source string, targets []string) []string {
 97 | 	return find(source, targets, normalizeTransformer())
 98 | }
 99 | 
100 | // FindNormalizedFold is a unicode-normalized and case-insensitive version of Find.
101 | func FindNormalizedFold(source string, targets []string) []string {
102 | 	return find(source, targets, normalizedFoldTransformer())
103 | }
104 | 
105 | func find(source string, targets []string, transformer transform.Transformer) []string {
106 | 	sourceT := stringTransform(source, transformer)
107 | 
108 | 	var matches []string
109 | 
110 | 	for _, target := range targets {
111 | 		targetT := stringTransform(target, transformer)
112 | 		if matchTransformed(sourceT, targetT) {
113 | 			matches = append(matches, target)
114 | 		}
115 | 	}
116 | 
117 | 	return matches
118 | }
119 | 
120 | // RankMatch is similar to Match except it will measure the Levenshtein
121 | // distance between the source and the target and return its result. If there
122 | // was no match, it will return -1.
123 | // Given the requirements of match, RankMatch only needs to perform a subset of
124 | // the Levenshtein calculation, only deletions need be considered, required
125 | // additions and substitutions would fail the match test.
126 | func RankMatch(source, target string) int {
127 | 	return rank(source, target, noopTransformer())
128 | }
129 | 
130 | // RankMatchFold is a case-insensitive version of RankMatch.
131 | func RankMatchFold(source, target string) int {
132 | 	return rank(source, target, foldTransformer())
133 | }
134 | 
135 | // RankMatchNormalized is a unicode-normalized version of RankMatch.
136 | func RankMatchNormalized(source, target string) int {
137 | 	return rank(source, target, normalizeTransformer())
138 | }
139 | 
140 | // RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch.
141 | func RankMatchNormalizedFold(source, target string) int {
142 | 	return rank(source, target, normalizedFoldTransformer())
143 | }
144 | 
145 | func rank(source, target string, transformer transform.Transformer) int {
146 | 	lenDiff := len(target) - len(source)
147 | 
148 | 	if lenDiff < 0 {
149 | 		return -1
150 | 	}
151 | 
152 | 	source = stringTransform(source, transformer)
153 | 	target = stringTransform(target, transformer)
154 | 
155 | 	if lenDiff == 0 && source == target {
156 | 		return 0
157 | 	}
158 | 
159 | 	runeDiff := 0
160 | 
161 | Outer:
162 | 	for _, r1 := range source {
163 | 		for i, r2 := range target {
164 | 			if r1 == r2 {
165 | 				target = target[i+utf8.RuneLen(r2):]
166 | 				continue Outer
167 | 			} else {
168 | 				runeDiff++
169 | 			}
170 | 		}
171 | 		return -1
172 | 	}
173 | 
174 | 	// Count up remaining char
175 | 	runeDiff += utf8.RuneCountInString(target)
176 | 
177 | 	return runeDiff
178 | }
179 | 
180 | // RankFind is similar to Find, except it will also rank all matches using
181 | // Levenshtein distance.
182 | func RankFind(source string, targets []string) Ranks {
183 | 	return rankFind(source, targets, noopTransformer())
184 | }
185 | 
186 | // RankFindFold is a case-insensitive version of RankFind.
187 | func RankFindFold(source string, targets []string) Ranks {
188 | 	return rankFind(source, targets, foldTransformer())
189 | }
190 | 
191 | // RankFindNormalized is a unicode-normalized version of RankFind.
192 | func RankFindNormalized(source string, targets []string) Ranks {
193 | 	return rankFind(source, targets, normalizeTransformer())
194 | }
195 | 
196 | // RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind.
197 | func RankFindNormalizedFold(source string, targets []string) Ranks {
198 | 	return rankFind(source, targets, normalizedFoldTransformer())
199 | }
200 | 
201 | func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
202 | 	sourceT := stringTransform(source, transformer)
203 | 
204 | 	var r Ranks
205 | 
206 | 	for index, target := range targets {
207 | 		targetT := stringTransform(target, transformer)
208 | 		if matchTransformed(sourceT, targetT) {
209 | 			distance := LevenshteinDistance(source, target)
210 | 			r = append(r, Rank{source, target, distance, index})
211 | 		}
212 | 	}
213 | 	return r
214 | }
215 | 
216 | type Rank struct {
217 | 	// Source is used as the source for matching.
218 | 	Source string
219 | 
220 | 	// Target is the word matched against.
221 | 	Target string
222 | 
223 | 	// Distance is the Levenshtein distance between Source and Target.
224 | 	Distance int
225 | 
226 | 	// Location of Target in original list
227 | 	OriginalIndex int
228 | }
229 | 
230 | type Ranks []Rank
231 | 
232 | func (r Ranks) Len() int {
233 | 	return len(r)
234 | }
235 | 
236 | func (r Ranks) Swap(i, j int) {
237 | 	r[i], r[j] = r[j], r[i]
238 | }
239 | 
240 | func (r Ranks) Less(i, j int) bool {
241 | 	return r[i].Distance < r[j].Distance
242 | }
243 | 
244 | func stringTransform(s string, t transform.Transformer) (transformed string) {
245 | 	// Fast path for the nop transformer to prevent unnecessary allocations.
246 | 	if _, ok := t.(nopTransformer); ok {
247 | 		return s
248 | 	}
249 | 
250 | 	var err error
251 | 	transformed, _, err = transform.String(t, s)
252 | 	if err != nil {
253 | 		transformed = s
254 | 	}
255 | 
256 | 	return
257 | }
258 | 
259 | type unicodeFoldTransformer struct{ transform.NopResetter }
260 | 
261 | func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
262 | 	// Converting src to a string allocates.
263 | 	// In theory, it need not; see https://go.dev/issue/27148.
264 | 	// It is possible to write this loop using utf8.DecodeRune
265 | 	// and thereby avoid allocations, but it is noticeably slower.
266 | 	// So just let's wait for the compiler to get smarter.
267 | 	for _, r := range string(src) {
268 | 		if r == utf8.RuneError {
269 | 			// Go spec for ranging over a string says:
270 | 			// If the iteration encounters an invalid UTF-8 sequence,
271 | 			// the second value will be 0xFFFD, the Unicode replacement character,
272 | 			// and the next iteration will advance a single byte in the string.
273 | 			nSrc++
274 | 		} else {
275 | 			nSrc += utf8.RuneLen(r)
276 | 		}
277 | 		r = unicode.ToLower(r)
278 | 		x := utf8.RuneLen(r)
279 | 		if x > len(dst[nDst:]) {
280 | 			err = transform.ErrShortDst
281 | 			break
282 | 		}
283 | 		nDst += utf8.EncodeRune(dst[nDst:], r)
284 | 	}
285 | 	return nDst, nSrc, err
286 | }
287 | 
288 | type nopTransformer struct{ transform.NopResetter }
289 | 
290 | func (nopTransformer) Transform(dst []byte, src []byte, atEOF bool) (int, int, error) {
291 | 	return 0, len(src), nil
292 | }
293 | 


--------------------------------------------------------------------------------
/fuzzy/fuzzy_test.go:
--------------------------------------------------------------------------------
  1 | package fuzzy
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"strings"
  7 | 	"testing"
  8 | )
  9 | 
 10 | const deBelloGallico = `All Gaul is divided into three parts, one of which the Belgae inhabit,
 11 | the Aquitani another, those who in their own language are called Celts, in our Gauls, the third.
 12 | All these differ from each other in language, customs and laws. The river Garonne separates the
 13 | Gauls from the Aquitani; the Marne and the Seine separate them from the Belgae. Of all these,
 14 | the Belgae are the bravest, because they are furthest from the civilization and refinement of
 15 | [our] Province, and merchants least frequently resort to them, and import those things which tend
 16 | to effeminate the mind; and they are the nearest to the Germans, who dwell beyond the Rhine,
 17 | with whom they are continually waging war; for which reason the Helvetii also surpass the rest
 18 | of the Gauls in valor, as they contend with the Germans in almost daily battles, when they either
 19 | repel them from their own territories, or themselves wage war on their frontiers. One part of
 20 | these, which it has been said that the Gauls occupy, takes its beginning at the river Rhone;
 21 | it is bounded by the river Garonne, the ocean, and the territories of the Belgae; it borders,
 22 | too, on the side of the Sequani and the Helvetii, upon the river Rhine, and stretches toward the
 23 | north. The Belgae rises from the extreme frontier of Gaul, extend to the lower part of the river
 24 | Rhine; and look toward the north and the rising sun. Aquitania extends from the river Garonne to
 25 | the Pyrenaean mountains and to that part of the ocean which is near Spain: it looks between the
 26 | setting of the sun, and the north star.`
 27 | 
 28 | type fuzzyTest struct {
 29 | 	source string
 30 | 	target string
 31 | 	wanted bool
 32 | 	rank   int
 33 | }
 34 | 
 35 | var fuzzyTests = []fuzzyTest{
 36 | 	{"zazz", deBelloGallico + " zazz", true, 1544},
 37 | 	{"zazz", "zazz " + deBelloGallico, true, 1544},
 38 | 	{"twl", "cartwheel", true, 6},
 39 | 	{"cart", "cartwheel", true, 5},
 40 | 	{"cw", "cartwheel", true, 7},
 41 | 	{"ee", "cartwheel", true, 7},
 42 | 	{"art", "cartwheel", true, 6},
 43 | 	{"eeel", "cartwheel", false, -1},
 44 | 	{"dog", "cartwheel", false, -1},
 45 | 	{"ёлка", "ёлочка", true, 2},
 46 | 	{"ветер", "ёлочка", false, -1},
 47 | 	{"中国", "中华人民共和国", true, 5},
 48 | 	{"日本", "中华人民共和国", false, -1},
 49 | 	{"イ", "イカ", true, 1},
 50 | 	{"limón", "limon", false, -1},
 51 | 	{"kitten", "setting", false, -1},
 52 | 	{"\xffinvalid UTF-8\xff", "", false, -1}, // invalid UTF-8
 53 | 	{"Ⱦ", "", false, -1},                     // uppercase and lowercase runes have different UTF-8 encoding lengths
 54 | }
 55 | 
 56 | func TestFuzzyMatch(t *testing.T) {
 57 | 	for _, val := range fuzzyTests {
 58 | 		match := Match(val.source, val.target)
 59 | 		if match != val.wanted {
 60 | 			t.Errorf("%s in %s expected match to be %t, got %t",
 61 | 				val.source, val.target, val.wanted, match)
 62 | 		}
 63 | 	}
 64 | }
 65 | 
 66 | func TestFuzzyMatchFold(t *testing.T) {
 67 | 	for _, val := range fuzzyTests {
 68 | 		match := MatchFold(val.source, strings.ToUpper(val.target))
 69 | 		if match != val.wanted {
 70 | 			t.Errorf("%s in %s expected match to be %t, got %t",
 71 | 				val.source, strings.ToUpper(val.target), val.wanted, match)
 72 | 		}
 73 | 	}
 74 | }
 75 | 
 76 | func TestFuzzyMatchNormalized(t *testing.T) {
 77 | 	var normalizedTests = []struct {
 78 | 		source string
 79 | 		target string
 80 | 		wanted bool
 81 | 	}{
 82 | 		{"limon", "limón", true},
 83 | 		{"limón", "limon tart", true},
 84 | 		{"limón", "LiMóN tArT", false},
 85 | 		{"limón", "LeMoN tArT", false},
 86 | 	}
 87 | 
 88 | 	for _, val := range normalizedTests {
 89 | 		match := MatchNormalized(val.source, val.target)
 90 | 		if match != val.wanted {
 91 | 			t.Errorf("%s in %s expected match to be %t, got %t",
 92 | 				val.source, val.target, val.wanted, match)
 93 | 		}
 94 | 	}
 95 | }
 96 | 
 97 | func TestFuzzyMatchNormalizedFold(t *testing.T) {
 98 | 	var normalizedTests = []struct {
 99 | 		source string
100 | 		target string
101 | 		wanted bool
102 | 	}{
103 | 		{"limon", "limón", true},
104 | 		{"limón", "limon tart", true},
105 | 		{"limón", "LiMóN tArT", true},
106 | 		{"limón", "LeMoN tArT", false},
107 | 	}
108 | 
109 | 	for _, val := range normalizedTests {
110 | 		match := MatchNormalizedFold(val.source, val.target)
111 | 		if match != val.wanted {
112 | 			t.Errorf("%s in %s expected match to be %t, got %t",
113 | 				val.source, val.target, val.wanted, match)
114 | 		}
115 | 	}
116 | }
117 | 
118 | func TestFuzzyFind(t *testing.T) {
119 | 	target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél"}
120 | 	wanted := []string{"cartwheel", "wheel"}
121 | 
122 | 	matches := Find("whel", target)
123 | 
124 | 	if len(matches) != len(wanted) {
125 | 		t.Errorf("expected %s, got %s", wanted, matches)
126 | 	}
127 | 
128 | 	for i := range wanted {
129 | 		if wanted[i] != matches[i] {
130 | 			t.Errorf("expected %s, got %s", wanted, matches)
131 | 		}
132 | 	}
133 | }
134 | 
135 | func TestFuzzyFindNormalized(t *testing.T) {
136 | 	target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél", "WHEEL"}
137 | 	wanted := []string{"cartwheel", "wheel", "cartwhéél"}
138 | 
139 | 	matches := FindNormalized("whél", target)
140 | 
141 | 	if len(matches) != len(wanted) {
142 | 		t.Errorf("expected %s, got %s", wanted, matches)
143 | 	}
144 | 
145 | 	for i := range wanted {
146 | 		if wanted[i] != matches[i] {
147 | 			t.Errorf("expected %s, got %s", wanted, matches)
148 | 		}
149 | 	}
150 | }
151 | 
152 | func TestFuzzyFindNormalizedFold(t *testing.T) {
153 | 	target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél", "WHEEL"}
154 | 	wanted := []string{"cartwheel", "wheel", "cartwhéél", "WHEEL"}
155 | 
156 | 	matches := FindNormalizedFold("whél", target)
157 | 
158 | 	if len(matches) != len(wanted) {
159 | 		t.Errorf("expected %s, got %s", wanted, matches)
160 | 	}
161 | 
162 | 	for i := range wanted {
163 | 		if wanted[i] != matches[i] {
164 | 			t.Errorf("expected %s, got %s", wanted, matches)
165 | 		}
166 | 	}
167 | }
168 | 
169 | func TestRankMatch(t *testing.T) {
170 | 	for _, val := range fuzzyTests {
171 | 		rank := RankMatch(val.source, val.target)
172 | 		if rank != val.rank {
173 | 			t.Errorf("expected ranking %d, got %d for %s in %s",
174 | 				val.rank, rank, val.source, val.target)
175 | 		}
176 | 	}
177 | }
178 | 
179 | func TestRankMatchNormalized(t *testing.T) {
180 | 	var fuzzyTests = []struct {
181 | 		source string
182 | 		target string
183 | 		rank   int
184 | 	}{
185 | 		{"limó", "limon", 1},
186 | 		{"limó", "limon", 1},
187 | 		{"limó", "LIMON", -1},
188 | 	}
189 | 
190 | 	for _, val := range fuzzyTests {
191 | 		rank := RankMatchNormalized(val.source, val.target)
192 | 		if rank != val.rank {
193 | 			t.Errorf("expected ranking %d, got %d for %s in %s",
194 | 				val.rank, rank, val.source, val.target)
195 | 		}
196 | 	}
197 | }
198 | 
199 | func TestRankMatchNormalizedFold(t *testing.T) {
200 | 	var fuzzyTests = []struct {
201 | 		source string
202 | 		target string
203 | 		rank   int
204 | 	}{
205 | 		{"limó", "limon", 1},
206 | 		{"limó", "limon", 1},
207 | 		{"limó", "LIMON", 1},
208 | 		{"limó", "LIMON TART", 6},
209 | 	}
210 | 
211 | 	for _, val := range fuzzyTests {
212 | 		rank := RankMatchNormalizedFold(val.source, val.target)
213 | 		if rank != val.rank {
214 | 			t.Errorf("expected ranking %d, got %d for %s in %s",
215 | 				val.rank, rank, val.source, val.target)
216 | 		}
217 | 	}
218 | }
219 | 
220 | func TestRankMatchNormalizedFoldConcurrent(t *testing.T) {
221 | 	target := strings.Split("Lorem ipsum dolor sit amet, consectetur adipiscing elit", " ")
222 | 	source := "ips"
223 | 	procs := 10
224 | 	iter := 10
225 | 	type empty struct{}
226 | 	done := make(chan empty)
227 | 	for i := 0; i <= procs; i++ {
228 | 		go func() {
229 | 			for n := 0; n < iter; n++ {
230 | 				_ = RankFindNormalizedFold(source, target)
231 | 			}
232 | 			done <- empty{}
233 | 		}()
234 | 	}
235 | 	cnt := 0
236 | 	for i := 0; i < procs; i++ {
237 | 		<-done
238 | 		cnt++
239 | 	}
240 | }
241 | 
242 | func TestRankFind(t *testing.T) {
243 | 	target := []string{"cartwheel", "foobar", "wheel", "baz"}
244 | 	wanted := []Rank{
245 | 		{"whl", "cartwheel", 6, 0},
246 | 		{"whl", "wheel", 2, 2},
247 | 	}
248 | 
249 | 	ranks := RankFind("whl", target)
250 | 
251 | 	if len(ranks) != len(wanted) {
252 | 		t.Errorf("expected %+v, got %+v", wanted, ranks)
253 | 	}
254 | 
255 | 	for i := range wanted {
256 | 		if wanted[i] != ranks[i] {
257 | 			t.Errorf("expected %+v, got %+v", wanted, ranks)
258 | 		}
259 | 	}
260 | }
261 | 
262 | func TestRankFindNormalized(t *testing.T) {
263 | 	target := []string{"limón", "limon", "lemon", "LIMON"}
264 | 	wanted := []Rank{
265 | 		{"limó", "limón", 1, 0},
266 | 		{"limó", "limon", 2, 1},
267 | 	}
268 | 
269 | 	ranks := RankFindNormalized("limó", target)
270 | 
271 | 	if len(ranks) != len(wanted) {
272 | 		t.Errorf("expected %+v, got %+v", wanted, ranks)
273 | 	}
274 | 
275 | 	for i := range wanted {
276 | 		if wanted[i] != ranks[i] {
277 | 			t.Errorf("expected %+v, got %+v", wanted, ranks)
278 | 		}
279 | 	}
280 | }
281 | 
282 | func TestRankFindNormalizedFold(t *testing.T) {
283 | 	target := []string{"limón", "limon", "lemon", "LIMON"}
284 | 	wanted := []Rank{
285 | 		{"limó", "limón", 1, 0},
286 | 		{"limó", "limon", 2, 1},
287 | 		{"limó", "LIMON", 5, 3},
288 | 	}
289 | 
290 | 	ranks := RankFindNormalizedFold("limó", target)
291 | 
292 | 	if len(ranks) != len(wanted) {
293 | 		t.Errorf("expected %+v, got %+v", wanted, ranks)
294 | 	}
295 | 
296 | 	for i := range wanted {
297 | 		if wanted[i] != ranks[i] {
298 | 			t.Errorf("expected %+v, got %+v", wanted, ranks)
299 | 		}
300 | 	}
301 | }
302 | 
303 | func TestSortingRanks(t *testing.T) {
304 | 	rs := Ranks{{"a", "b", 1, 0}, {"a", "cc", 2, 1}, {"a", "a", 0, 2}}
305 | 	wanted := Ranks{rs[2], rs[0], rs[1]}
306 | 
307 | 	sort.Sort(rs)
308 | 
309 | 	for i := range wanted {
310 | 		if wanted[i] != rs[i] {
311 | 			t.Errorf("expected %+v, got %+v", wanted, rs)
312 | 		}
313 | 	}
314 | }
315 | 
316 | func BenchmarkMatch(b *testing.B) {
317 | 	ft := fuzzyTests[2]
318 | 	for i := 0; i < b.N; i++ {
319 | 		Match(ft.source, ft.target)
320 | 	}
321 | }
322 | 
323 | func BenchmarkMatchBigLate(b *testing.B) {
324 | 	ft := fuzzyTests[0]
325 | 	for i := 0; i < b.N; i++ {
326 | 		Match(ft.source, ft.target)
327 | 	}
328 | }
329 | 
330 | func BenchmarkMatchBigEarly(b *testing.B) {
331 | 	ft := fuzzyTests[1]
332 | 	for i := 0; i < b.N; i++ {
333 | 		Match(ft.source, ft.target)
334 | 	}
335 | }
336 | 
337 | func BenchmarkMatchFold(b *testing.B) {
338 | 	ft := fuzzyTests[2]
339 | 	for i := 0; i < b.N; i++ {
340 | 		MatchFold(ft.source, ft.target)
341 | 	}
342 | }
343 | 
344 | func BenchmarkMatchFoldBigLate(b *testing.B) {
345 | 	ft := fuzzyTests[0]
346 | 	for i := 0; i < b.N; i++ {
347 | 		MatchFold(ft.source, ft.target)
348 | 	}
349 | }
350 | 
351 | func BenchmarkMatchFoldBigEarly(b *testing.B) {
352 | 	ft := fuzzyTests[1]
353 | 	for i := 0; i < b.N; i++ {
354 | 		MatchFold(ft.source, ft.target)
355 | 	}
356 | }
357 | 
358 | func BenchmarkFindFold(b *testing.B) {
359 | 	b.Run("Plain", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[2]) })
360 | 	b.Run("BigLate", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[0]) })
361 | 	b.Run("BigEarly", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[1]) })
362 | }
363 | 
364 | func benchmarkFindFold(b *testing.B, ft fuzzyTest) {
365 | 	src := ft.source
366 | 	var tgts []string
367 | 	for i := 0; i < 128; i++ {
368 | 		tgts = append(tgts, ft.target)
369 | 	}
370 | 	b.ResetTimer()
371 | 	for i := 0; i < b.N; i++ {
372 | 		FindFold(src, tgts)
373 | 	}
374 | }
375 | 
376 | func BenchmarkRankMatch(b *testing.B) {
377 | 	ft := fuzzyTests[2]
378 | 	for i := 0; i < b.N; i++ {
379 | 		RankMatch(ft.source, ft.target)
380 | 	}
381 | }
382 | 
383 | func BenchmarkRankMatchBigLate(b *testing.B) {
384 | 	ft := fuzzyTests[0]
385 | 	for i := 0; i < b.N; i++ {
386 | 		RankMatch(ft.source, ft.target)
387 | 	}
388 | }
389 | 
390 | func BenchmarkRankMatchBigEarly(b *testing.B) {
391 | 	ft := fuzzyTests[1]
392 | 	for i := 0; i < b.N; i++ {
393 | 		RankMatch(ft.source, ft.target)
394 | 	}
395 | }
396 | 
397 | func ExampleMatch() {
398 | 	fmt.Print(Match("twl", "cartwheel"))
399 | 	// Output: true
400 | }
401 | 
402 | func ExampleFind() {
403 | 	fmt.Print(Find("whl", []string{"cartwheel", "foobar", "wheel", "baz"}))
404 | 	// Output: [cartwheel wheel]
405 | }
406 | 
407 | func ExampleRankMatch() {
408 | 	fmt.Print(RankMatch("twl", "cartwheel"))
409 | 	// Output: 6
410 | }
411 | 
412 | func ExampleRankFind() {
413 | 	fmt.Printf("%+v", RankFind("whl", []string{"cartwheel", "foobar", "wheel", "baz"}))
414 | 	// Output: [{Source:whl Target:cartwheel Distance:6 OriginalIndex:0} {Source:whl Target:wheel Distance:2 OriginalIndex:2}]
415 | }
416 | 


--------------------------------------------------------------------------------