├── .github └── workflows │ └── main.yaml ├── LICENSE.txt ├── README.md ├── fuzzy.nimble ├── src └── fuzzy.nim └── tests ├── config.nims └── test1.nim /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | pull_request: 5 | push: 6 | schedule: 7 | - cron: '0 0 * * 1' 8 | 9 | jobs: 10 | tests: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | nimversion: 15 | - stable 16 | - 1.4.2 17 | - 1.2.8 18 | os: 19 | - ubuntu-latest 20 | - macOS-latest 21 | - windows-latest 22 | steps: 23 | - uses: actions/checkout@v1 24 | - uses: iffy/install-nim@v3 25 | with: 26 | version: ${{ matrix.nimversion }} 27 | - name: Test 28 | run: | 29 | nimble install -y 30 | nimble test 31 | nimble refresh 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice (including the next 11 | paragraph) shall be included in all copies or substantial portions of the 12 | Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS 17 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 | OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fuzzy search library for nim 2 | 3 | There are two important methods `fuzzyMatch` and `fuzzyMatchSmart`. 4 | 5 | 6 | the usage is really straight forward: 7 | 8 | ``` nim 9 | var s1 = "foo bar baz" 10 | var s2 = "bAz" 11 | var s3 = "fobz" 12 | var s4 = "bra" 13 | 14 | echo fuzzyMatchSmart(s1, s2) # => 1.0 15 | echo fuzzyMatchSmart(s1, s3) # => 0.5 16 | echo fuzzyMatchSmart(s1, s4) # => 0.6 17 | 18 | `fuzzyMatchSmart` tries to be smart about the strings so it does: 19 | - lowercase whole string 20 | - sorts substrings splitted by `" "` 21 | - best matching substring of the length of the shorter one 22 | ``` 23 | -------------------------------------------------------------------------------- /fuzzy.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.1.0" 4 | author = "Jedrzej Nowak" 5 | description = "Pure nim fuzzy search implementation. Supports substrings etc" 6 | license = "MIT" 7 | srcDir = "src" 8 | 9 | 10 | # Dependencies 11 | 12 | requires "nim >= 1.2.8" 13 | -------------------------------------------------------------------------------- /src/fuzzy.nim: -------------------------------------------------------------------------------- 1 | import strutils 2 | from algorithm import sorted 3 | # from std/editdistance import editDistanceAscii # stdlib one yields correct distance BUT for ratio cost should be higher because yields better results (Python does that too) 4 | 5 | proc levenshtein_ratio_and_distance*(s, t: string, ratio_calc = true): float = 6 | ## This should be very similar to python implementation 7 | ## Calculates ratio and distance depending on `ratio_calc` 8 | let rows = s.len + 1 9 | let cols = t.len + 1 10 | var distance: seq[seq[int]] 11 | var cost: int 12 | distance = newSeq[seq[int]](rows) 13 | for i in 0 ..< rows: 14 | distance[i] = newSeq[int](cols) 15 | for i in 1 ..< rows: 16 | for k in 1 ..< cols: 17 | distance[i][0] = i 18 | distance[0][k] = k 19 | 20 | for col in 1 ..< cols: 21 | for row in 1 ..< rows: 22 | if s[row - 1] == t[col - 1]: 23 | cost = 0 24 | else: 25 | if ratio_calc: 26 | cost = 2 27 | else: 28 | cost = 1 29 | distance[row][col] = min(min(distance[row-1][col] + 1, 30 | distance[row][col - 1] + 1), 31 | distance[row-1][col - 1] + cost) 32 | let dst = distance[rows - 1][cols - 1] 33 | if ratio_calc: 34 | # echo s, " - ", t, " = ", $(((s.len + t.len) - dst).float / (s.len + t.len).float) 35 | return ((s.len + t.len) - dst).float / (s.len + t.len).float 36 | else: 37 | return dst.float 38 | 39 | 40 | # proc levenshtein_ratio_and_distance*(s, t: string, ratio_calc=true): float = 41 | # ## stdlib distance is suboptimal for that case because here we mostly need 42 | # ## cost = 2 43 | # var dst = editDistanceAscii(s, t) 44 | # if ratio_calc: 45 | # return ((s.len + t.len) - dst).float / (s.len + t.len).float 46 | # return dst.float 47 | 48 | 49 | proc fuzzyMatch*(s1, s2: string): float = 50 | ## Just basic fuzzy match 51 | ## Could be used as a base for other algorithms 52 | if s1.len > s2.len: 53 | return levenshtein_ratio_and_distance(s2, s1, ratio_calc = true) 54 | return levenshtein_ratio_and_distance(s1, s2, ratio_calc = true) 55 | 56 | 57 | proc fuzzyMatchSmart*(s1, s2: string, withSubstring = true): float = 58 | ##Tries to be smart about the strings so: 59 | ## - lowercase 60 | ## - sorts substrings 61 | ## - best matching substring of length of shorter one 62 | var str1: string 63 | var str2: string 64 | str1 = s1.toLower 65 | str2 = s2.toLower 66 | str1 = str1.split(" ").sorted().join(" ") 67 | str2 = str2.split(" ").sorted().join(" ") 68 | if str1 == str2: 69 | return 1.0 70 | if str1.len == str2.len: 71 | return fuzzyMatch(str1, str2) 72 | var shorter, longer: string 73 | if str1.len < str2.len: 74 | shorter = str1 75 | longer = str2 76 | else: 77 | shorter = str2 78 | longer = str1 79 | var tmpRes = fuzzyMatch(shorter, longer) 80 | if withSubstring: 81 | let lengthDiff = longer.len - shorter.len 82 | var subMatch: float 83 | for i in 0 .. lengthDiff: 84 | subMatch = fuzzyMatch(shorter, longer[i ..< i + shorter.len]) 85 | tmpRes = max(tmpRes, subMatch) 86 | return tmpRes 87 | -------------------------------------------------------------------------------- /tests/config.nims: -------------------------------------------------------------------------------- 1 | switch("path", "$projectDir/../src") -------------------------------------------------------------------------------- /tests/test1.nim: -------------------------------------------------------------------------------- 1 | import fuzzy 2 | 3 | # Test fuzzy 4 | block: 5 | var s1 = "fooBar" 6 | var s2 = "foobar" 7 | var s3 = "fobar" 8 | var s4 = "other" 9 | var s5 = "foo bar" 10 | 11 | block: 12 | var result = levenshtein_ratio_and_distance(s1, s2, false) 13 | doAssert result == 1, $result 14 | 15 | result = levenshtein_ratio_and_distance(s1, s3, false) 16 | doAssert result == 2, $result 17 | 18 | block: 19 | var result1 = levenshtein_ratio_and_distance(s1, s2, true) 20 | var result2 = levenshtein_ratio_and_distance(s1, s3, true) 21 | var result3 = levenshtein_ratio_and_distance(s1, s4, true) 22 | doAssert result1 != 1 23 | doAssert result2 != 1 24 | doAssert result3 < result1, $result3 25 | 26 | 27 | block: 28 | var s1 = "foo bar baz" 29 | var s2 = "fobz" 30 | 31 | var result1 = levenshtein_ratio_and_distance(s1, s2, true) 32 | doAssert result1 > 0.5, $result1 33 | 34 | block: 35 | doAssert fuzzyMatch(s1, s4) < 0.7, $fuzzyMatch(s1, s4) 36 | doAssert fuzzyMatch(s1, s2) > 0.8, $fuzzyMatch(s1, s2) 37 | doAssert fuzzyMatch(s1, s5) > 0.7, $fuzzyMatch(s1, s5) 38 | 39 | 40 | block: 41 | var s1 = "bazz" 42 | var s2 = "bAz" 43 | var s3 = "baz" 44 | 45 | doAssert fuzzyMatchSmart(s1, s2) == fuzzyMatchSmart(s1, s3) 46 | doAssert fuzzyMatchSmart(s1, s2) == fuzzyMatchSmart(s2, s3) 47 | 48 | 49 | block: 50 | var s1 = "Some very long sentence with spaces and other stuff" 51 | var s2 = "and" 52 | var s3 = "with sentence" 53 | var s4 = "other long stuff" 54 | 55 | doAssert fuzzyMatchSmart(s1, s2) == 1, $fuzzyMatchSmart(s1, s2) 56 | doAssert fuzzyMatchSmart(s1, s3) > 0.5, $fuzzyMatchSmart(s1, s3) 57 | doAssert fuzzyMatchSmart(s1, s4) > 0.8, $fuzzyMatchSmart(s1, s4) 58 | --------------------------------------------------------------------------------