├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ └── go.yml ├── go.mod ├── go.sum ├── fuzzy ├── fuzz_test.go ├── levenshtein.go ├── levenshtein_test.go ├── fuzzy.go └── fuzzy_test.go ├── LICENSE └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: lithammer 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/lithammer/fuzzysearch 2 | 3 | go 1.24.0 4 | 5 | require golang.org/x/text v0.31.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= 2 | golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "monthly" 12 | -------------------------------------------------------------------------------- /fuzzy/fuzz_test.go: -------------------------------------------------------------------------------- 1 | package fuzzy 2 | 3 | import ( 4 | "sort" 5 | "testing" 6 | ) 7 | 8 | func FuzzFind(f *testing.F) { 9 | f.Fuzz(func(t *testing.T, n string, h []byte) { 10 | s := make([]string, len(h)) 11 | for i, b := range h { 12 | s[i] = string(b) 13 | } 14 | Find(n, s) 15 | FindFold(n, s) 16 | FindNormalized(n, s) 17 | FindNormalizedFold(n, s) 18 | r := RankFind(n, s) 19 | sort.Sort(r) 20 | // No need to sort the other Rank calls; 21 | // assume first sort can catch any bugs. 22 | RankFindFold(n, s) 23 | RankFindNormalized(n, s) 24 | RankFindNormalizedFold(n, s) 25 | if len(s) > 0 { 26 | x := s[0] 27 | LevenshteinDistance(n, x) 28 | Match(n, x) 29 | MatchFold(n, x) 30 | MatchNormalized(n, x) 31 | MatchNormalizedFold(n, x) 32 | } 33 | }) 34 | } 35 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | strategy: 14 | matrix: 15 | platform: [ubuntu-latest, macos-latest, windows-latest] 16 | go-version: ["1.24", "1.25"] 17 | runs-on: ${{ matrix.platform }} 18 | steps: 19 | - name: Setup Go 20 | uses: actions/setup-go@v6 21 | with: 22 | go-version: ${{ matrix.go-version }} 23 | 24 | - name: Checkout 25 | uses: actions/checkout@v6 26 | 27 | - name: Download Go dependencies 28 | run: go mod download 29 | env: 30 | GOPROXY: "https://proxy.golang.org" 31 | 32 | - name: Build 33 | run: go build -v ./... 34 | 35 | - name: Test 36 | run: go test -v ./... 37 | 38 | - name: Lint 39 | uses: golangci/golangci-lint-action@v9.1.0 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Peter Lithammer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fuzzy/levenshtein.go: -------------------------------------------------------------------------------- 1 | package fuzzy 2 | 3 | // LevenshteinDistance measures the difference between two strings. 4 | // The Levenshtein distance between two words is the minimum number of 5 | // single-character edits (i.e. insertions, deletions or substitutions) 6 | // required to change one word into the other. 7 | // 8 | // This implemention is optimized to use O(min(m,n)) space and is based on the 9 | // optimized C version found here: 10 | // http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C 11 | func LevenshteinDistance(s, t string) int { 12 | r1, r2 := []rune(s), []rune(t) 13 | column := make([]int, 1, 64) 14 | 15 | for y := 1; y <= len(r1); y++ { 16 | column = append(column, y) 17 | } 18 | 19 | for x := 1; x <= len(r2); x++ { 20 | column[0] = x 21 | 22 | for y, lastDiag := 1, x-1; y <= len(r1); y++ { 23 | oldDiag := column[y] 24 | cost := 0 25 | if r1[y-1] != r2[x-1] { 26 | cost = 1 27 | } 28 | column[y] = min(column[y]+1, column[y-1]+1, lastDiag+cost) 29 | lastDiag = oldDiag 30 | } 31 | } 32 | 33 | return column[len(r1)] 34 | } 35 | 36 | func min2(a, b int) int { 37 | if a < b { 38 | return a 39 | } 40 | return b 41 | } 42 | 43 | func min(a, b, c int) int { 44 | return min2(min2(a, b), c) 45 | } 46 | -------------------------------------------------------------------------------- /fuzzy/levenshtein_test.go: -------------------------------------------------------------------------------- 1 | package fuzzy 2 | 3 | import "testing" 4 | 5 | var levenshteinDistanceTests = []struct { 6 | s, t string 7 | wanted int 8 | }{ 9 | {"zazz", deBelloGallico + " zazz", 1544}, 10 | {"zazz", "zazz " + deBelloGallico, 1544}, 11 | {"a", "a", 0}, 12 | {"ab", "ab", 0}, 13 | {"ab", "aa", 1}, 14 | {"ab", "aaa", 2}, 15 | {"bbb", "a", 3}, 16 | {"kitten", "sitting", 3}, 17 | {"ёлка", "ёлочка", 2}, 18 | {"ветер", "ёлочка", 6}, 19 | {"中国", "中华人民共和国", 5}, 20 | {"日本", "中华人民共和国", 7}, 21 | } 22 | 23 | func TestLevenshtein(t *testing.T) { 24 | for _, test := range levenshteinDistanceTests { 25 | distance := LevenshteinDistance(test.s, test.t) 26 | if distance != test.wanted { 27 | t.Errorf("got distance %d, expected %d for %s in %s", 28 | distance, test.wanted, test.s, test.t) 29 | } 30 | } 31 | } 32 | 33 | func BenchmarkLevenshteinDistance(b *testing.B) { 34 | ldt := levenshteinDistanceTests[2] 35 | ldt2 := levenshteinDistanceTests[5] 36 | for i := 0; i < b.N; i++ { 37 | LevenshteinDistance(ldt.s, ldt.t) 38 | LevenshteinDistance(ldt2.s, ldt2.t) 39 | } 40 | } 41 | 42 | func BenchmarkLevenshteinDistanceBigLate(b *testing.B) { 43 | ldt := levenshteinDistanceTests[0] 44 | for i := 0; i < b.N; i++ { 45 | LevenshteinDistance(ldt.s, ldt.t) 46 | } 47 | } 48 | 49 | func BenchmarkLevenshteinDistanceBigEarly(b *testing.B) { 50 | ldt := levenshteinDistanceTests[1] 51 | for i := 0; i < b.N; i++ { 52 | LevenshteinDistance(ldt.s, ldt.t) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fuzzy Search 2 | 3 | Inspired by [bevacqua/fuzzysearch][1], a fuzzy matching library written in 4 | JavaScript. But contains some extras like ranking using [Levenshtein 5 | distance][2] and finding matches in a list of words. 6 | 7 | Fuzzy searching allows for flexibly matching a string with partial input, 8 | useful for filtering data very quickly based on lightweight user input. 9 | 10 | The current implementation uses the algorithm suggested by Mr. Aleph, a russian 11 | compiler engineer working at V8. 12 | 13 | ## Install 14 | 15 | ``` 16 | go get github.com/lithammer/fuzzysearch/fuzzy 17 | ``` 18 | 19 | ## Usage 20 | 21 | ```go 22 | package main 23 | 24 | import "github.com/lithammer/fuzzysearch/fuzzy" 25 | 26 | func main() { 27 | fuzzy.Match("twl", "cartwheel") // true 28 | fuzzy.Match("cart", "cartwheel") // true 29 | fuzzy.Match("cw", "cartwheel") // true 30 | fuzzy.Match("ee", "cartwheel") // true 31 | fuzzy.Match("art", "cartwheel") // true 32 | fuzzy.Match("eeel", "cartwheel") // false 33 | fuzzy.Match("dog", "cartwheel") // false 34 | fuzzy.Match("kitten", "sitting") // false 35 | 36 | fuzzy.RankMatch("kitten", "sitting") // -1 37 | fuzzy.RankMatch("cart", "cartwheel") // 5 38 | 39 | words := []string{"cartwheel", "foobar", "wheel", "baz"} 40 | fuzzy.Find("whl", words) // [cartwheel wheel] 41 | 42 | fuzzy.RankFind("whl", words) // [{whl cartwheel 6 0} {whl wheel 2 2}] 43 | 44 | // Unicode normalized matching. 45 | fuzzy.MatchNormalized("cartwheel", "cartwhéél") // true 46 | 47 | // Case insensitive matching. 48 | fuzzy.MatchFold("ArTeeL", "cartwheel") // true 49 | } 50 | ``` 51 | 52 | You can sort the result of a `fuzzy.RankFind()` call using the [`sort`][3] 53 | package in the standard library: 54 | 55 | ```go 56 | matches := fuzzy.RankFind("whl", words) // [{whl cartwheel 6 0} {whl wheel 2 2}] 57 | sort.Sort(matches) // [{whl wheel 2 2} {whl cartwheel 6 0}] 58 | ``` 59 | 60 | See the [`fuzzy`][4] package documentation for more examples. 61 | 62 | ## License 63 | 64 | MIT 65 | 66 | [1]: https://github.com/bevacqua/fuzzysearch 67 | [2]: http://en.wikipedia.org/wiki/Levenshtein_distance 68 | [3]: https://golang.org/pkg/sort/ 69 | [4]: https://pkg.go.dev/github.com/lithammer/fuzzysearch/fuzzy 70 | -------------------------------------------------------------------------------- /fuzzy/fuzzy.go: -------------------------------------------------------------------------------- 1 | // Fuzzy searching allows for flexibly matching a string with partial input, 2 | // useful for filtering data very quickly based on lightweight user input. 3 | package fuzzy 4 | 5 | import ( 6 | "unicode" 7 | "unicode/utf8" 8 | 9 | "golang.org/x/text/runes" 10 | "golang.org/x/text/transform" 11 | "golang.org/x/text/unicode/norm" 12 | ) 13 | 14 | func noopTransformer() transform.Transformer { 15 | return nopTransformer{} 16 | } 17 | 18 | func foldTransformer() transform.Transformer { 19 | return unicodeFoldTransformer{} 20 | } 21 | 22 | func normalizeTransformer() transform.Transformer { 23 | return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) 24 | } 25 | 26 | func normalizedFoldTransformer() transform.Transformer { 27 | return transform.Chain(normalizeTransformer(), foldTransformer()) 28 | } 29 | 30 | // Match returns true if source matches target using a fuzzy-searching 31 | // algorithm. Note that it doesn't implement Levenshtein distance (see 32 | // RankMatch instead), but rather a simplified version where there's no 33 | // approximation. The method will return true only if each character in the 34 | // source can be found in the target and occurs after the preceding matches. 35 | func Match(source, target string) bool { 36 | return match(source, target, noopTransformer()) 37 | } 38 | 39 | // MatchFold is a case-insensitive version of Match. 40 | func MatchFold(source, target string) bool { 41 | return match(source, target, foldTransformer()) 42 | } 43 | 44 | // MatchNormalized is a unicode-normalized version of Match. 45 | func MatchNormalized(source, target string) bool { 46 | return match(source, target, normalizeTransformer()) 47 | } 48 | 49 | // MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match. 50 | func MatchNormalizedFold(source, target string) bool { 51 | return match(source, target, normalizedFoldTransformer()) 52 | } 53 | 54 | func match(source, target string, transformer transform.Transformer) bool { 55 | sourceT := stringTransform(source, transformer) 56 | targetT := stringTransform(target, transformer) 57 | return matchTransformed(sourceT, targetT) 58 | } 59 | 60 | func matchTransformed(source, target string) bool { 61 | lenDiff := len(target) - len(source) 62 | 63 | if lenDiff < 0 { 64 | return false 65 | } 66 | 67 | if lenDiff == 0 && source == target { 68 | return true 69 | } 70 | 71 | Outer: 72 | for _, r1 := range source { 73 | for i, r2 := range target { 74 | if r1 == r2 { 75 | target = target[i+utf8.RuneLen(r2):] 76 | continue Outer 77 | } 78 | } 79 | return false 80 | } 81 | 82 | return true 83 | } 84 | 85 | // Find will return a list of strings in targets that fuzzy matches source. 86 | func Find(source string, targets []string) []string { 87 | return find(source, targets, noopTransformer()) 88 | } 89 | 90 | // FindFold is a case-insensitive version of Find. 91 | func FindFold(source string, targets []string) []string { 92 | return find(source, targets, foldTransformer()) 93 | } 94 | 95 | // FindNormalized is a unicode-normalized version of Find. 96 | func FindNormalized(source string, targets []string) []string { 97 | return find(source, targets, normalizeTransformer()) 98 | } 99 | 100 | // FindNormalizedFold is a unicode-normalized and case-insensitive version of Find. 101 | func FindNormalizedFold(source string, targets []string) []string { 102 | return find(source, targets, normalizedFoldTransformer()) 103 | } 104 | 105 | func find(source string, targets []string, transformer transform.Transformer) []string { 106 | sourceT := stringTransform(source, transformer) 107 | 108 | var matches []string 109 | 110 | for _, target := range targets { 111 | targetT := stringTransform(target, transformer) 112 | if matchTransformed(sourceT, targetT) { 113 | matches = append(matches, target) 114 | } 115 | } 116 | 117 | return matches 118 | } 119 | 120 | // RankMatch is similar to Match except it will measure the Levenshtein 121 | // distance between the source and the target and return its result. If there 122 | // was no match, it will return -1. 123 | // Given the requirements of match, RankMatch only needs to perform a subset of 124 | // the Levenshtein calculation, only deletions need be considered, required 125 | // additions and substitutions would fail the match test. 126 | func RankMatch(source, target string) int { 127 | return rank(source, target, noopTransformer()) 128 | } 129 | 130 | // RankMatchFold is a case-insensitive version of RankMatch. 131 | func RankMatchFold(source, target string) int { 132 | return rank(source, target, foldTransformer()) 133 | } 134 | 135 | // RankMatchNormalized is a unicode-normalized version of RankMatch. 136 | func RankMatchNormalized(source, target string) int { 137 | return rank(source, target, normalizeTransformer()) 138 | } 139 | 140 | // RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch. 141 | func RankMatchNormalizedFold(source, target string) int { 142 | return rank(source, target, normalizedFoldTransformer()) 143 | } 144 | 145 | func rank(source, target string, transformer transform.Transformer) int { 146 | lenDiff := len(target) - len(source) 147 | 148 | if lenDiff < 0 { 149 | return -1 150 | } 151 | 152 | source = stringTransform(source, transformer) 153 | target = stringTransform(target, transformer) 154 | 155 | if lenDiff == 0 && source == target { 156 | return 0 157 | } 158 | 159 | runeDiff := 0 160 | 161 | Outer: 162 | for _, r1 := range source { 163 | for i, r2 := range target { 164 | if r1 == r2 { 165 | target = target[i+utf8.RuneLen(r2):] 166 | continue Outer 167 | } else { 168 | runeDiff++ 169 | } 170 | } 171 | return -1 172 | } 173 | 174 | // Count up remaining char 175 | runeDiff += utf8.RuneCountInString(target) 176 | 177 | return runeDiff 178 | } 179 | 180 | // RankFind is similar to Find, except it will also rank all matches using 181 | // Levenshtein distance. 182 | func RankFind(source string, targets []string) Ranks { 183 | return rankFind(source, targets, noopTransformer()) 184 | } 185 | 186 | // RankFindFold is a case-insensitive version of RankFind. 187 | func RankFindFold(source string, targets []string) Ranks { 188 | return rankFind(source, targets, foldTransformer()) 189 | } 190 | 191 | // RankFindNormalized is a unicode-normalized version of RankFind. 192 | func RankFindNormalized(source string, targets []string) Ranks { 193 | return rankFind(source, targets, normalizeTransformer()) 194 | } 195 | 196 | // RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind. 197 | func RankFindNormalizedFold(source string, targets []string) Ranks { 198 | return rankFind(source, targets, normalizedFoldTransformer()) 199 | } 200 | 201 | func rankFind(source string, targets []string, transformer transform.Transformer) Ranks { 202 | sourceT := stringTransform(source, transformer) 203 | 204 | var r Ranks 205 | 206 | for index, target := range targets { 207 | targetT := stringTransform(target, transformer) 208 | if matchTransformed(sourceT, targetT) { 209 | distance := LevenshteinDistance(source, target) 210 | r = append(r, Rank{source, target, distance, index}) 211 | } 212 | } 213 | return r 214 | } 215 | 216 | type Rank struct { 217 | // Source is used as the source for matching. 218 | Source string 219 | 220 | // Target is the word matched against. 221 | Target string 222 | 223 | // Distance is the Levenshtein distance between Source and Target. 224 | Distance int 225 | 226 | // Location of Target in original list 227 | OriginalIndex int 228 | } 229 | 230 | type Ranks []Rank 231 | 232 | func (r Ranks) Len() int { 233 | return len(r) 234 | } 235 | 236 | func (r Ranks) Swap(i, j int) { 237 | r[i], r[j] = r[j], r[i] 238 | } 239 | 240 | func (r Ranks) Less(i, j int) bool { 241 | return r[i].Distance < r[j].Distance 242 | } 243 | 244 | func stringTransform(s string, t transform.Transformer) (transformed string) { 245 | // Fast path for the nop transformer to prevent unnecessary allocations. 246 | if _, ok := t.(nopTransformer); ok { 247 | return s 248 | } 249 | 250 | var err error 251 | transformed, _, err = transform.String(t, s) 252 | if err != nil { 253 | transformed = s 254 | } 255 | 256 | return 257 | } 258 | 259 | type unicodeFoldTransformer struct{ transform.NopResetter } 260 | 261 | func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 262 | // Converting src to a string allocates. 263 | // In theory, it need not; see https://go.dev/issue/27148. 264 | // It is possible to write this loop using utf8.DecodeRune 265 | // and thereby avoid allocations, but it is noticeably slower. 266 | // So just let's wait for the compiler to get smarter. 267 | for _, r := range string(src) { 268 | if r == utf8.RuneError { 269 | // Go spec for ranging over a string says: 270 | // If the iteration encounters an invalid UTF-8 sequence, 271 | // the second value will be 0xFFFD, the Unicode replacement character, 272 | // and the next iteration will advance a single byte in the string. 273 | nSrc++ 274 | } else { 275 | nSrc += utf8.RuneLen(r) 276 | } 277 | r = unicode.ToLower(r) 278 | x := utf8.RuneLen(r) 279 | if x > len(dst[nDst:]) { 280 | err = transform.ErrShortDst 281 | break 282 | } 283 | nDst += utf8.EncodeRune(dst[nDst:], r) 284 | } 285 | return nDst, nSrc, err 286 | } 287 | 288 | type nopTransformer struct{ transform.NopResetter } 289 | 290 | func (nopTransformer) Transform(dst []byte, src []byte, atEOF bool) (int, int, error) { 291 | return 0, len(src), nil 292 | } 293 | -------------------------------------------------------------------------------- /fuzzy/fuzzy_test.go: -------------------------------------------------------------------------------- 1 | package fuzzy 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | const deBelloGallico = `All Gaul is divided into three parts, one of which the Belgae inhabit, 11 | the Aquitani another, those who in their own language are called Celts, in our Gauls, the third. 12 | All these differ from each other in language, customs and laws. The river Garonne separates the 13 | Gauls from the Aquitani; the Marne and the Seine separate them from the Belgae. Of all these, 14 | the Belgae are the bravest, because they are furthest from the civilization and refinement of 15 | [our] Province, and merchants least frequently resort to them, and import those things which tend 16 | to effeminate the mind; and they are the nearest to the Germans, who dwell beyond the Rhine, 17 | with whom they are continually waging war; for which reason the Helvetii also surpass the rest 18 | of the Gauls in valor, as they contend with the Germans in almost daily battles, when they either 19 | repel them from their own territories, or themselves wage war on their frontiers. One part of 20 | these, which it has been said that the Gauls occupy, takes its beginning at the river Rhone; 21 | it is bounded by the river Garonne, the ocean, and the territories of the Belgae; it borders, 22 | too, on the side of the Sequani and the Helvetii, upon the river Rhine, and stretches toward the 23 | north. The Belgae rises from the extreme frontier of Gaul, extend to the lower part of the river 24 | Rhine; and look toward the north and the rising sun. Aquitania extends from the river Garonne to 25 | the Pyrenaean mountains and to that part of the ocean which is near Spain: it looks between the 26 | setting of the sun, and the north star.` 27 | 28 | type fuzzyTest struct { 29 | source string 30 | target string 31 | wanted bool 32 | rank int 33 | } 34 | 35 | var fuzzyTests = []fuzzyTest{ 36 | {"zazz", deBelloGallico + " zazz", true, 1544}, 37 | {"zazz", "zazz " + deBelloGallico, true, 1544}, 38 | {"twl", "cartwheel", true, 6}, 39 | {"cart", "cartwheel", true, 5}, 40 | {"cw", "cartwheel", true, 7}, 41 | {"ee", "cartwheel", true, 7}, 42 | {"art", "cartwheel", true, 6}, 43 | {"eeel", "cartwheel", false, -1}, 44 | {"dog", "cartwheel", false, -1}, 45 | {"ёлка", "ёлочка", true, 2}, 46 | {"ветер", "ёлочка", false, -1}, 47 | {"中国", "中华人民共和国", true, 5}, 48 | {"日本", "中华人民共和国", false, -1}, 49 | {"イ", "イカ", true, 1}, 50 | {"limón", "limon", false, -1}, 51 | {"kitten", "setting", false, -1}, 52 | {"\xffinvalid UTF-8\xff", "", false, -1}, // invalid UTF-8 53 | {"Ⱦ", "", false, -1}, // uppercase and lowercase runes have different UTF-8 encoding lengths 54 | } 55 | 56 | func TestFuzzyMatch(t *testing.T) { 57 | for _, val := range fuzzyTests { 58 | match := Match(val.source, val.target) 59 | if match != val.wanted { 60 | t.Errorf("%s in %s expected match to be %t, got %t", 61 | val.source, val.target, val.wanted, match) 62 | } 63 | } 64 | } 65 | 66 | func TestFuzzyMatchFold(t *testing.T) { 67 | for _, val := range fuzzyTests { 68 | match := MatchFold(val.source, strings.ToUpper(val.target)) 69 | if match != val.wanted { 70 | t.Errorf("%s in %s expected match to be %t, got %t", 71 | val.source, strings.ToUpper(val.target), val.wanted, match) 72 | } 73 | } 74 | } 75 | 76 | func TestFuzzyMatchNormalized(t *testing.T) { 77 | var normalizedTests = []struct { 78 | source string 79 | target string 80 | wanted bool 81 | }{ 82 | {"limon", "limón", true}, 83 | {"limón", "limon tart", true}, 84 | {"limón", "LiMóN tArT", false}, 85 | {"limón", "LeMoN tArT", false}, 86 | } 87 | 88 | for _, val := range normalizedTests { 89 | match := MatchNormalized(val.source, val.target) 90 | if match != val.wanted { 91 | t.Errorf("%s in %s expected match to be %t, got %t", 92 | val.source, val.target, val.wanted, match) 93 | } 94 | } 95 | } 96 | 97 | func TestFuzzyMatchNormalizedFold(t *testing.T) { 98 | var normalizedTests = []struct { 99 | source string 100 | target string 101 | wanted bool 102 | }{ 103 | {"limon", "limón", true}, 104 | {"limón", "limon tart", true}, 105 | {"limón", "LiMóN tArT", true}, 106 | {"limón", "LeMoN tArT", false}, 107 | } 108 | 109 | for _, val := range normalizedTests { 110 | match := MatchNormalizedFold(val.source, val.target) 111 | if match != val.wanted { 112 | t.Errorf("%s in %s expected match to be %t, got %t", 113 | val.source, val.target, val.wanted, match) 114 | } 115 | } 116 | } 117 | 118 | func TestFuzzyFind(t *testing.T) { 119 | target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél"} 120 | wanted := []string{"cartwheel", "wheel"} 121 | 122 | matches := Find("whel", target) 123 | 124 | if len(matches) != len(wanted) { 125 | t.Errorf("expected %s, got %s", wanted, matches) 126 | } 127 | 128 | for i := range wanted { 129 | if wanted[i] != matches[i] { 130 | t.Errorf("expected %s, got %s", wanted, matches) 131 | } 132 | } 133 | } 134 | 135 | func TestFuzzyFindNormalized(t *testing.T) { 136 | target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél", "WHEEL"} 137 | wanted := []string{"cartwheel", "wheel", "cartwhéél"} 138 | 139 | matches := FindNormalized("whél", target) 140 | 141 | if len(matches) != len(wanted) { 142 | t.Errorf("expected %s, got %s", wanted, matches) 143 | } 144 | 145 | for i := range wanted { 146 | if wanted[i] != matches[i] { 147 | t.Errorf("expected %s, got %s", wanted, matches) 148 | } 149 | } 150 | } 151 | 152 | func TestFuzzyFindNormalizedFold(t *testing.T) { 153 | target := []string{"cartwheel", "foobar", "wheel", "baz", "cartwhéél", "WHEEL"} 154 | wanted := []string{"cartwheel", "wheel", "cartwhéél", "WHEEL"} 155 | 156 | matches := FindNormalizedFold("whél", target) 157 | 158 | if len(matches) != len(wanted) { 159 | t.Errorf("expected %s, got %s", wanted, matches) 160 | } 161 | 162 | for i := range wanted { 163 | if wanted[i] != matches[i] { 164 | t.Errorf("expected %s, got %s", wanted, matches) 165 | } 166 | } 167 | } 168 | 169 | func TestRankMatch(t *testing.T) { 170 | for _, val := range fuzzyTests { 171 | rank := RankMatch(val.source, val.target) 172 | if rank != val.rank { 173 | t.Errorf("expected ranking %d, got %d for %s in %s", 174 | val.rank, rank, val.source, val.target) 175 | } 176 | } 177 | } 178 | 179 | func TestRankMatchNormalized(t *testing.T) { 180 | var fuzzyTests = []struct { 181 | source string 182 | target string 183 | rank int 184 | }{ 185 | {"limó", "limon", 1}, 186 | {"limó", "limon", 1}, 187 | {"limó", "LIMON", -1}, 188 | } 189 | 190 | for _, val := range fuzzyTests { 191 | rank := RankMatchNormalized(val.source, val.target) 192 | if rank != val.rank { 193 | t.Errorf("expected ranking %d, got %d for %s in %s", 194 | val.rank, rank, val.source, val.target) 195 | } 196 | } 197 | } 198 | 199 | func TestRankMatchNormalizedFold(t *testing.T) { 200 | var fuzzyTests = []struct { 201 | source string 202 | target string 203 | rank int 204 | }{ 205 | {"limó", "limon", 1}, 206 | {"limó", "limon", 1}, 207 | {"limó", "LIMON", 1}, 208 | {"limó", "LIMON TART", 6}, 209 | } 210 | 211 | for _, val := range fuzzyTests { 212 | rank := RankMatchNormalizedFold(val.source, val.target) 213 | if rank != val.rank { 214 | t.Errorf("expected ranking %d, got %d for %s in %s", 215 | val.rank, rank, val.source, val.target) 216 | } 217 | } 218 | } 219 | 220 | func TestRankMatchNormalizedFoldConcurrent(t *testing.T) { 221 | target := strings.Split("Lorem ipsum dolor sit amet, consectetur adipiscing elit", " ") 222 | source := "ips" 223 | procs := 10 224 | iter := 10 225 | type empty struct{} 226 | done := make(chan empty) 227 | for i := 0; i <= procs; i++ { 228 | go func() { 229 | for n := 0; n < iter; n++ { 230 | _ = RankFindNormalizedFold(source, target) 231 | } 232 | done <- empty{} 233 | }() 234 | } 235 | cnt := 0 236 | for i := 0; i < procs; i++ { 237 | <-done 238 | cnt++ 239 | } 240 | } 241 | 242 | func TestRankFind(t *testing.T) { 243 | target := []string{"cartwheel", "foobar", "wheel", "baz"} 244 | wanted := []Rank{ 245 | {"whl", "cartwheel", 6, 0}, 246 | {"whl", "wheel", 2, 2}, 247 | } 248 | 249 | ranks := RankFind("whl", target) 250 | 251 | if len(ranks) != len(wanted) { 252 | t.Errorf("expected %+v, got %+v", wanted, ranks) 253 | } 254 | 255 | for i := range wanted { 256 | if wanted[i] != ranks[i] { 257 | t.Errorf("expected %+v, got %+v", wanted, ranks) 258 | } 259 | } 260 | } 261 | 262 | func TestRankFindNormalized(t *testing.T) { 263 | target := []string{"limón", "limon", "lemon", "LIMON"} 264 | wanted := []Rank{ 265 | {"limó", "limón", 1, 0}, 266 | {"limó", "limon", 2, 1}, 267 | } 268 | 269 | ranks := RankFindNormalized("limó", target) 270 | 271 | if len(ranks) != len(wanted) { 272 | t.Errorf("expected %+v, got %+v", wanted, ranks) 273 | } 274 | 275 | for i := range wanted { 276 | if wanted[i] != ranks[i] { 277 | t.Errorf("expected %+v, got %+v", wanted, ranks) 278 | } 279 | } 280 | } 281 | 282 | func TestRankFindNormalizedFold(t *testing.T) { 283 | target := []string{"limón", "limon", "lemon", "LIMON"} 284 | wanted := []Rank{ 285 | {"limó", "limón", 1, 0}, 286 | {"limó", "limon", 2, 1}, 287 | {"limó", "LIMON", 5, 3}, 288 | } 289 | 290 | ranks := RankFindNormalizedFold("limó", target) 291 | 292 | if len(ranks) != len(wanted) { 293 | t.Errorf("expected %+v, got %+v", wanted, ranks) 294 | } 295 | 296 | for i := range wanted { 297 | if wanted[i] != ranks[i] { 298 | t.Errorf("expected %+v, got %+v", wanted, ranks) 299 | } 300 | } 301 | } 302 | 303 | func TestSortingRanks(t *testing.T) { 304 | rs := Ranks{{"a", "b", 1, 0}, {"a", "cc", 2, 1}, {"a", "a", 0, 2}} 305 | wanted := Ranks{rs[2], rs[0], rs[1]} 306 | 307 | sort.Sort(rs) 308 | 309 | for i := range wanted { 310 | if wanted[i] != rs[i] { 311 | t.Errorf("expected %+v, got %+v", wanted, rs) 312 | } 313 | } 314 | } 315 | 316 | func BenchmarkMatch(b *testing.B) { 317 | ft := fuzzyTests[2] 318 | for i := 0; i < b.N; i++ { 319 | Match(ft.source, ft.target) 320 | } 321 | } 322 | 323 | func BenchmarkMatchBigLate(b *testing.B) { 324 | ft := fuzzyTests[0] 325 | for i := 0; i < b.N; i++ { 326 | Match(ft.source, ft.target) 327 | } 328 | } 329 | 330 | func BenchmarkMatchBigEarly(b *testing.B) { 331 | ft := fuzzyTests[1] 332 | for i := 0; i < b.N; i++ { 333 | Match(ft.source, ft.target) 334 | } 335 | } 336 | 337 | func BenchmarkMatchFold(b *testing.B) { 338 | ft := fuzzyTests[2] 339 | for i := 0; i < b.N; i++ { 340 | MatchFold(ft.source, ft.target) 341 | } 342 | } 343 | 344 | func BenchmarkMatchFoldBigLate(b *testing.B) { 345 | ft := fuzzyTests[0] 346 | for i := 0; i < b.N; i++ { 347 | MatchFold(ft.source, ft.target) 348 | } 349 | } 350 | 351 | func BenchmarkMatchFoldBigEarly(b *testing.B) { 352 | ft := fuzzyTests[1] 353 | for i := 0; i < b.N; i++ { 354 | MatchFold(ft.source, ft.target) 355 | } 356 | } 357 | 358 | func BenchmarkFindFold(b *testing.B) { 359 | b.Run("Plain", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[2]) }) 360 | b.Run("BigLate", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[0]) }) 361 | b.Run("BigEarly", func(b *testing.B) { benchmarkFindFold(b, fuzzyTests[1]) }) 362 | } 363 | 364 | func benchmarkFindFold(b *testing.B, ft fuzzyTest) { 365 | src := ft.source 366 | var tgts []string 367 | for i := 0; i < 128; i++ { 368 | tgts = append(tgts, ft.target) 369 | } 370 | b.ResetTimer() 371 | for i := 0; i < b.N; i++ { 372 | FindFold(src, tgts) 373 | } 374 | } 375 | 376 | func BenchmarkRankMatch(b *testing.B) { 377 | ft := fuzzyTests[2] 378 | for i := 0; i < b.N; i++ { 379 | RankMatch(ft.source, ft.target) 380 | } 381 | } 382 | 383 | func BenchmarkRankMatchBigLate(b *testing.B) { 384 | ft := fuzzyTests[0] 385 | for i := 0; i < b.N; i++ { 386 | RankMatch(ft.source, ft.target) 387 | } 388 | } 389 | 390 | func BenchmarkRankMatchBigEarly(b *testing.B) { 391 | ft := fuzzyTests[1] 392 | for i := 0; i < b.N; i++ { 393 | RankMatch(ft.source, ft.target) 394 | } 395 | } 396 | 397 | func ExampleMatch() { 398 | fmt.Print(Match("twl", "cartwheel")) 399 | // Output: true 400 | } 401 | 402 | func ExampleFind() { 403 | fmt.Print(Find("whl", []string{"cartwheel", "foobar", "wheel", "baz"})) 404 | // Output: [cartwheel wheel] 405 | } 406 | 407 | func ExampleRankMatch() { 408 | fmt.Print(RankMatch("twl", "cartwheel")) 409 | // Output: 6 410 | } 411 | 412 | func ExampleRankFind() { 413 | fmt.Printf("%+v", RankFind("whl", []string{"cartwheel", "foobar", "wheel", "baz"})) 414 | // Output: [{Source:whl Target:cartwheel Distance:6 OriginalIndex:0} {Source:whl Target:wheel Distance:2 OriginalIndex:2}] 415 | } 416 | --------------------------------------------------------------------------------