├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── bench.rs └── src ├── lib.rs ├── main.rs ├── matching.rs ├── parsing.rs ├── scoring.rs └── search.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Install rust 12 | uses: actions-rs/toolchain@v1 13 | with: 14 | toolchain: stable 15 | 16 | - name: Build 17 | uses: actions-rs/cargo@v1 18 | with: 19 | command: build 20 | args: --release --all-features 21 | - name: Test 22 | uses: actions-rs/cargo@v1 23 | with: 24 | command: test 25 | args: --all-features 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sublime_fuzzy" 3 | version = "0.7.0" 4 | authors = ["Benedikt Schatz "] 5 | description = "Fuzzy matching algorithm based on Sublime Text's string search." 6 | repository = "https://github.com/Schlechtwetterfront/fuzzy-rs" 7 | readme = "README.md" 8 | keywords = ["fuzzy", "match", "search", "text"] 9 | categories = ["text-processing", "algorithms"] 10 | license-file = "LICENSE" 11 | 12 | [lib] 13 | path = "src/lib.rs" 14 | 15 | [[bin]] 16 | name = "sfz" 17 | path = "src/main.rs" 18 | 19 | [dependencies] 20 | serde_derive = { version = "1.0.67", optional = true } 21 | serde = { version = "1.0.67", optional = true } 22 | 23 | [features] 24 | serde_support = ["serde", "serde_derive"] 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sublime_fuzzy [![sublime_fuzzy on crates.io](https://img.shields.io/crates/v/sublime_fuzzy.svg)](https://crates.io/crates/sublime_fuzzy) 2 | 3 | Fuzzy matching algorithm based on Sublime Text's string search. Iterates through 4 | characters of a search string and calculates a score. 5 | 6 | The score is based on several factors: 7 | 8 | - **Word starts** like the `t` in `some_thing` get a bonus (`bonus_word_start`) 9 | - **Consecutive matches** get an accumulative bonus for every consecutive match (`bonus_consecutive`) 10 | - Matches that also match **case** (`T` -> `T` instead of `t` -> `T`) in case of a case-insensitive search get a bonus (`bonus_match_case`) 11 | - The **distance** between two matches will be multiplied with the `penalty_distance` penalty and subtracted from the score 12 | 13 | The default scoring is configured to give a lot of weight to word starts. So a pattern `scc` will match 14 | **S**occer**C**artoon**C**ontroller, not **S**o**cc**erCartoonController. 15 | 16 | # Match Examples 17 | 18 | With default weighting. 19 | 20 | | Pattern | Target string | Result | 21 | | ----------- | ------------------------- | ----------------------------------- | 22 | | `scc` | `SoccerCartoonController` | **S**occer**C**artoon**C**ontroller | 23 | | `something` | `some search thing` | **some** search **thing** | 24 | 25 | # Usage 26 | 27 | Basic usage: 28 | 29 | ```rust 30 | use sublime_fuzzy::best_match; 31 | 32 | let result = best_match("something", "some search thing"); 33 | 34 | assert!(result.is_some()); 35 | ``` 36 | 37 | `Match::continuous_matches` returns an iter of consecutive matches. Based on those the input 38 | string can be formatted. 39 | 40 | `format_simple` provides a simple formatting that wraps matches in tags: 41 | 42 | ```rust 43 | use sublime_fuzzy::{best_match, format_simple}; 44 | 45 | let target = "some search thing"; 46 | 47 | let result = best_match("something", target).unwrap(); 48 | 49 | assert_eq!( 50 | format_simple(&result, target, "", ""), 51 | "some search thing" 52 | ); 53 | ``` 54 | 55 | The weighting of the different factors can be adjusted: 56 | 57 | ```rust 58 | use sublime_fuzzy::{FuzzySearch, Scoring}; 59 | 60 | // Or pick from one of the provided `Scoring::...` methods like `emphasize_word_starts` 61 | let scoring = Scoring { 62 | bonus_consecutive: 128, 63 | bonus_word_start: 0, 64 | ..Scoring::default() 65 | }; 66 | 67 | let result = FuzzySearch::new("something", "some search thing") 68 | .case_sensitive() 69 | .score_with(&scoring) 70 | .best_match(); 71 | 72 | assert!(result.is_some()) 73 | ``` 74 | 75 | **Note:** Any whitespace in the pattern (`'something'` 76 | in the examples above) will be removed. 77 | 78 | ### Documentation 79 | 80 | Check out the documentation at [docs.rs](https://docs.rs/sublime_fuzzy/). 81 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate sublime_fuzzy; 3 | extern crate test; 4 | 5 | use sublime_fuzzy::{best_match, format_simple}; 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn empty(b: &mut Bencher) { 10 | b.iter(|| 1); 11 | } 12 | 13 | #[bench] 14 | fn short(b: &mut Bencher) { 15 | b.iter(|| { 16 | best_match("jelly", "jellyfish"); 17 | }) 18 | } 19 | 20 | #[bench] 21 | fn url(b: &mut Bencher) { 22 | b.iter(|| best_match( 23 | "services", 24 | "https://some-domain.io/api/tenant/1/group/some-group/setup/c4b158c3-047f-48d8-8f7a-8ac20d20460b/lists/services/?before=2020-01-01" 25 | )); 26 | } 27 | 28 | #[bench] 29 | fn url_format(b: &mut Bencher) { 30 | b.iter(|| { 31 | let t = "https://some-domain.io/api/tenant/1/group/some-group/setup/c4b158c3-047f-48d8-8f7a-8ac20d20460b/lists/services/?before=2020-01-01"; 32 | 33 | format_simple(&best_match("services", t).unwrap(), t, "", ""); 34 | }) 35 | } 36 | 37 | #[bench] 38 | fn medium_start(b: &mut Bencher) { 39 | b.iter(|| best_match( 40 | "tracking", 41 | "This is a tracking issue for the #[bench] attribute and its stability in the compiler. Currently it is not possible to use this from stable Rust as it requires extern crate test which is itself not stable." 42 | )); 43 | } 44 | 45 | #[bench] 46 | fn medium_middle(b: &mut Bencher) { 47 | b.iter(|| best_match( 48 | "requires", 49 | "This is a tracking issue for the #[bench] attribute and its stability in the compiler. Currently it is not possible to use this from stable Rust as it requires extern crate test which is itself not stable." 50 | )); 51 | } 52 | 53 | #[bench] 54 | fn medium_end(b: &mut Bencher) { 55 | b.iter(|| best_match( 56 | "itself", 57 | "This is a tracking issue for the #[bench] attribute and its stability in the compiler. Currently it is not possible to use this from stable Rust as it requires extern crate test which is itself not stable." 58 | )); 59 | } 60 | 61 | #[bench] 62 | fn long_start_close(b: &mut Bencher) { 63 | b.iter(|| { 64 | best_match( 65 | "empty baseline", 66 | r"The empty benchmark is there as a baseline. An anecdote: In my first 67 | compilation of the benchmark, I forgot to add -O to the rustc command 68 | line, and wound up with a few ns/iter on an empty benchmark. Thus, I 69 | now always have an empty benchmark in my list, to make sure I benchmark 70 | an optimized version.", 71 | ) 72 | }); 73 | } 74 | 75 | #[bench] 76 | fn long_middle_close(b: &mut Bencher) { 77 | b.iter(|| { 78 | best_match( 79 | "rustc wound", 80 | r"The empty benchmark is there as a baseline. An anecdote: In my first 81 | compilation of the benchmark, I forgot to add -O to the rustc command 82 | line, and wound up with a few ns/iter on an empty benchmark. Thus, I 83 | now always have an empty benchmark in my list, to make sure I benchmark 84 | an optimized version.", 85 | ) 86 | }); 87 | } 88 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Fuzzy matching algorithm based on Sublime Text's string search. Iterates through 2 | //! characters of a search string and calculates a score. 3 | //! 4 | //! The score is based on several factors: 5 | //! * **Word starts** like the `t` in `some_thing` get a bonus (`bonus_word_start`) 6 | //! * **Consecutive matches** get an accumulative bonus for every consecutive match (`bonus_consecutive`) 7 | //! * Matches that also match **case** (`T` -> `T` instead of `t` -> `T`) in case of a case-insensitive search get a bonus (`bonus_match_case`) 8 | //! * The **distance** between two matches will be multiplied with the `penalty_distance` penalty and subtracted from the score 9 | //! 10 | //! The default scoring is configured to give a lot of weight to word starts. So a pattern `scc` will match 11 | //! **S**occer**C**artoon**C**ontroller, not **S**o**cc**erCartoonController. 12 | //! 13 | //! # Match Examples 14 | //! 15 | //! With default weighting. 16 | //! 17 | //! | Pattern | Target string | Result 18 | //! | --- | --- | --- 19 | //! | `scc` | `SoccerCartoonController` | **S**occer**C**artoon**C**ontroller 20 | //! | `something` | `some search thing` | **some** search **thing** 21 | //! 22 | //! # Usage 23 | //! 24 | //! Basic usage: 25 | //! 26 | //! ```rust 27 | //! use sublime_fuzzy::best_match; 28 | //! 29 | //! let result = best_match("something", "some search thing"); 30 | //! 31 | //! assert!(result.is_some()); 32 | //! ``` 33 | //! 34 | //! [`Match::continuous_matches`] returns an iter of consecutive matches. Based on those the input 35 | //! string can be formatted. 36 | //! 37 | //! [`format_simple`] provides a simple formatting that wraps matches in tags: 38 | //! 39 | //! ```rust 40 | //! use sublime_fuzzy::{best_match, format_simple}; 41 | //! 42 | //! let target = "some search thing"; 43 | //! 44 | //! let result = best_match("something", target).unwrap(); 45 | //! 46 | //! assert_eq!( 47 | //! format_simple(&result, target, "", ""), 48 | //! "some search thing" 49 | //! ); 50 | //! ``` 51 | //! 52 | //! The weighting of the different factors can be adjusted: 53 | //! 54 | //! ```rust 55 | //! use sublime_fuzzy::{FuzzySearch, Scoring}; 56 | //! 57 | //! // Or pick from one of the provided `Scoring::...` methods like `emphasize_word_starts` 58 | //! let scoring = Scoring { 59 | //! bonus_consecutive: 128, 60 | //! bonus_word_start: 0, 61 | //! ..Scoring::default() 62 | //! }; 63 | //! 64 | //! let result = FuzzySearch::new("something", "some search thing") 65 | //! .case_sensitive() 66 | //! .score_with(&scoring) 67 | //! .best_match(); 68 | //! 69 | //! assert!(result.is_some()) 70 | //! ``` 71 | //! 72 | //! **Note:** Any whitespace in the pattern (`'something'` 73 | //! in the examples above) will be removed. 74 | //! 75 | #[cfg(feature = "serde_support")] 76 | extern crate serde; 77 | #[cfg(feature = "serde_support")] 78 | #[macro_use] 79 | extern crate serde_derive; 80 | 81 | mod matching; 82 | mod parsing; 83 | mod scoring; 84 | mod search; 85 | 86 | pub use matching::{ContinuousMatch, ContinuousMatches, Match}; 87 | pub use scoring::Scoring; 88 | pub use search::FuzzySearch; 89 | 90 | /// Returns the best match for `query` in the target string `string`. 91 | /// 92 | /// Always tries to match the _full_ pattern. A partial match is considered 93 | /// invalid and will return [`None`]. Will also return [`None`] in case `query` or 94 | /// `string` are empty. 95 | /// 96 | /// Note that whitespace in query will be _ignored_. 97 | /// 98 | /// # Examples 99 | /// 100 | /// Basic usage: 101 | /// 102 | /// ```rust 103 | /// use sublime_fuzzy::{best_match, Scoring}; 104 | /// 105 | /// let m = best_match("scc", "SoccerCartoonController") 106 | /// .expect("No match"); 107 | /// 108 | /// assert_eq!(m.matched_indices().len(), 3); 109 | /// assert_eq!(m.score(), 172); 110 | /// ``` 111 | /// 112 | pub fn best_match(query: &str, target: &str) -> Option { 113 | FuzzySearch::new(query, target) 114 | .case_insensitive() 115 | .best_match() 116 | } 117 | 118 | /// Formats a [`Match`] by appending `before` before any matches and `after` 119 | /// after any matches. 120 | /// 121 | /// # Examples 122 | /// 123 | /// Basic usage: 124 | /// 125 | /// ```rust 126 | /// use sublime_fuzzy::{best_match, format_simple}; 127 | /// 128 | /// let target_string = "some search thing"; 129 | /// let result = best_match("something", target_string).unwrap(); 130 | /// 131 | /// assert_eq!( 132 | /// format_simple(&result, target_string, "", ""), 133 | /// "some search thing" 134 | /// ); 135 | /// ``` 136 | /// 137 | pub fn format_simple(match_: &Match, target: &str, before: &str, after: &str) -> String { 138 | let str_before = before.to_owned(); 139 | let str_after = after.to_owned(); 140 | 141 | let mut pieces = Vec::new(); 142 | 143 | let mut last_end = 0; 144 | 145 | for c in match_.continuous_matches() { 146 | // Piece between last match and this match 147 | pieces.push( 148 | target 149 | .chars() 150 | .skip(last_end) 151 | .take(c.start() - last_end) 152 | .collect::(), 153 | ); 154 | 155 | pieces.push(str_before.clone()); 156 | 157 | // This match 158 | pieces.push(target.chars().skip(c.start()).take(c.len()).collect()); 159 | 160 | pieces.push(str_after.clone()); 161 | 162 | last_end = c.start() + c.len(); 163 | } 164 | 165 | // Leftover chars 166 | if last_end != target.len() { 167 | pieces.push(target.chars().skip(last_end).collect::()); 168 | } 169 | 170 | pieces.join("") 171 | } 172 | 173 | #[cfg(test)] 174 | mod tests { 175 | use crate::{best_match, format_simple, matching::ContinuousMatch}; 176 | 177 | #[test] 178 | fn feature_serde() { 179 | assert!(cfg!(feature = "serde_support")); 180 | } 181 | 182 | #[test] 183 | fn full_match() { 184 | assert!(best_match("test", "test").is_some()); 185 | } 186 | 187 | #[test] 188 | fn any_match() { 189 | assert!(best_match("towers", "the two towers").is_some()); 190 | } 191 | 192 | #[test] 193 | fn no_match() { 194 | assert_eq!(best_match("abc", "def"), None); 195 | } 196 | 197 | #[test] 198 | fn basic() { 199 | let r = best_match("scc", "soccer cartoon controller"); 200 | 201 | assert!(r.is_some()); 202 | } 203 | 204 | #[test] 205 | fn partial_match_none() { 206 | assert_eq!(best_match("partial", "part"), None); 207 | } 208 | 209 | #[test] 210 | fn case_sensitivity() { 211 | assert!( 212 | best_match("ttt", "The Two Towers").is_some(), 213 | "Lower query chars do not match upper target chars" 214 | ); 215 | 216 | assert!( 217 | best_match("TTT", "The Two Towers").is_some(), 218 | "Upper query chars do not match upper target chars" 219 | ); 220 | 221 | assert!( 222 | best_match("TTT", "the two towers").is_some(), 223 | "Upper query chars do not match lower target chars" 224 | ); 225 | } 226 | 227 | #[test] 228 | fn case_sensitivity_scoring() { 229 | let non_case_match = best_match("ttt", "The Two Towers").unwrap(); 230 | let case_match = best_match("TTT", "The Two Towers").unwrap(); 231 | 232 | assert!(non_case_match.score() < case_match.score()); 233 | } 234 | 235 | #[test] 236 | fn whitespace() { 237 | assert!(best_match("t t", "The Two Towers").is_some()); 238 | } 239 | 240 | #[test] 241 | fn word_starts_count_more() { 242 | let r = best_match("something", "some search thing"); 243 | 244 | assert_eq!( 245 | r.unwrap() 246 | .continuous_matches() 247 | .collect::>(), 248 | vec![ContinuousMatch::new(0, 4), ContinuousMatch::new(12, 5)] 249 | ); 250 | } 251 | 252 | #[test] 253 | fn word_starts_count_more_2() { 254 | let m = best_match("scc", "SccsCoolController").unwrap(); 255 | 256 | assert_eq!( 257 | m.continuous_matches().collect::>(), 258 | vec![ 259 | ContinuousMatch::new(0, 1), 260 | ContinuousMatch::new(4, 1), 261 | ContinuousMatch::new(8, 1) 262 | ] 263 | ); 264 | } 265 | 266 | #[test] 267 | fn empty_query() { 268 | assert_eq!(best_match("", "test"), None); 269 | } 270 | 271 | #[test] 272 | fn empty_target() { 273 | assert_eq!(best_match("test", ""), None); 274 | } 275 | 276 | #[test] 277 | fn distance_to_first_is_ignored() { 278 | let a = best_match("release", "some_release").unwrap(); 279 | let b = best_match("release", "a_release").unwrap(); 280 | 281 | assert_eq!(a.score(), b.score()); 282 | } 283 | 284 | #[test] 285 | fn matches_unicode() { 286 | let m = best_match("👀", "🦀 👈 👀").unwrap(); 287 | 288 | assert_eq!( 289 | m.matched_indices().cloned().collect::>(), 290 | vec![4] 291 | ); 292 | } 293 | 294 | #[test] 295 | fn formats_unicode() { 296 | let s = "🦀 👈 👀"; 297 | let m = best_match("👀", s).unwrap(); 298 | 299 | assert_eq!(format_simple(&m, s, "<", ">"), "🦀 👈 <👀>"); 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! Very basic binary for using/testing `sublime_fuzzy` from the command line. 2 | //! 3 | //! Pass the query as first and target string as second parameters to `sfz`. 4 | use std::env; 5 | 6 | use sublime_fuzzy::{best_match, format_simple}; 7 | 8 | extern crate sublime_fuzzy; 9 | 10 | fn main() { 11 | let args = env::args().collect::>(); 12 | 13 | let q = args.get(1).expect("Missing query arg"); 14 | let s = args.get(2).expect("Missing target arg"); 15 | 16 | if let Some(m) = best_match(q, s) { 17 | println!("{}", format_simple(&m, s, "<", ">")); 18 | } else { 19 | println!("No match"); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/matching.rs: -------------------------------------------------------------------------------- 1 | use std::{cmp::Ordering, slice::Iter}; 2 | 3 | use crate::Scoring; 4 | 5 | /// A (possible partial) match of query within the target string. Matched chars 6 | /// are stored as indices into the target string. 7 | /// 8 | /// The score is not clamped to any range and can be negative. 9 | #[derive(Clone, Debug)] 10 | #[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))] 11 | pub struct Match { 12 | /// Accumulative score 13 | score: isize, 14 | /// Count of current consecutive matched chars 15 | consecutive: usize, 16 | /// Matched char indices 17 | matched: Vec, 18 | } 19 | 20 | impl Match { 21 | /// Creates a new match with the given scoring and matched indices. 22 | pub(crate) fn with_matched(score: isize, consecutive: usize, matched: Vec) -> Self { 23 | Match { 24 | score, 25 | consecutive, 26 | matched, 27 | } 28 | } 29 | 30 | /// Returns the accumulative score for this match. 31 | pub fn score(&self) -> isize { 32 | self.score 33 | } 34 | 35 | /// Returns an iterator over the matched char indices. 36 | pub fn matched_indices(&self) -> Iter { 37 | self.matched.iter() 38 | } 39 | 40 | /// Returns an iterator that groups the individual char matches into groups. 41 | pub fn continuous_matches(&self) -> ContinuousMatches { 42 | ContinuousMatches { 43 | matched: &self.matched, 44 | current: 0, 45 | } 46 | } 47 | 48 | /// Extends this match with `other`. 49 | pub fn extend_with(&mut self, other: &Match, scoring: &Scoring) { 50 | self.score += other.score; 51 | self.consecutive += other.consecutive; 52 | 53 | if let (Some(last), Some(first)) = (self.matched.last(), other.matched.first()) { 54 | let distance = first - last; 55 | 56 | match distance { 57 | 0 => {} 58 | 1 => { 59 | self.consecutive += 1; 60 | self.score += self.consecutive as isize * scoring.bonus_consecutive; 61 | } 62 | _ => { 63 | self.consecutive = 0; 64 | let penalty = (distance as isize - 1) * scoring.penalty_distance; 65 | self.score -= penalty; 66 | } 67 | } 68 | } 69 | 70 | self.matched.extend(&other.matched); 71 | } 72 | } 73 | 74 | impl Ord for Match { 75 | fn cmp(&self, other: &Match) -> Ordering { 76 | self.score.cmp(&other.score) 77 | } 78 | } 79 | 80 | impl PartialOrd for Match { 81 | fn partial_cmp(&self, other: &Match) -> Option { 82 | Some(self.cmp(other)) 83 | } 84 | } 85 | 86 | impl Eq for Match {} 87 | 88 | impl PartialEq for Match { 89 | fn eq(&self, other: &Match) -> bool { 90 | self.score == other.score 91 | } 92 | } 93 | 94 | /// Describes a continuous group of char indices 95 | #[derive(Debug)] 96 | pub struct ContinuousMatch { 97 | start: usize, 98 | len: usize, 99 | } 100 | 101 | impl ContinuousMatch { 102 | pub(crate) fn new(start: usize, len: usize) -> Self { 103 | ContinuousMatch { start, len } 104 | } 105 | 106 | /// Returns the start index of this group. 107 | pub fn start(&self) -> usize { 108 | self.start 109 | } 110 | 111 | /// Returns the length of this group. 112 | pub fn len(&self) -> usize { 113 | self.len 114 | } 115 | } 116 | 117 | impl Eq for ContinuousMatch {} 118 | 119 | impl PartialEq for ContinuousMatch { 120 | fn eq(&self, other: &ContinuousMatch) -> bool { 121 | self.start == other.start && self.len == other.len 122 | } 123 | } 124 | 125 | /// Iterator returning [`ContinuousMatch`]es from the matched char indices in a [`Match`] 126 | pub struct ContinuousMatches<'a> { 127 | matched: &'a Vec, 128 | current: usize, 129 | } 130 | 131 | impl<'a> Iterator for ContinuousMatches<'_> { 132 | type Item = ContinuousMatch; 133 | 134 | fn next(&mut self) -> Option { 135 | let mut start = None; 136 | let mut len = 0; 137 | 138 | let mut last_idx = None; 139 | 140 | for idx in self.matched.iter().cloned().skip(self.current) { 141 | start = start.or(Some(idx)); 142 | 143 | if last_idx.is_some() && (idx - last_idx.unwrap() != 1) { 144 | return Some(ContinuousMatch::new(start.unwrap(), len)); 145 | } 146 | 147 | self.current += 1; 148 | len += 1; 149 | last_idx = Some(idx); 150 | } 151 | 152 | if last_idx.is_some() { 153 | return Some(ContinuousMatch::new(start.unwrap(), len)); 154 | } 155 | 156 | None 157 | } 158 | } 159 | 160 | #[cfg(test)] 161 | mod tests { 162 | use crate::Scoring; 163 | 164 | use super::{ContinuousMatch, Match}; 165 | 166 | #[test] 167 | fn continuous() { 168 | let m = Match::with_matched(0, 0, vec![0, 1, 2, 5, 6, 10]); 169 | 170 | assert_eq!( 171 | m.continuous_matches().collect::>(), 172 | vec![ 173 | ContinuousMatch { start: 0, len: 3 }, 174 | ContinuousMatch { start: 5, len: 2 }, 175 | ContinuousMatch { start: 10, len: 1 }, 176 | ] 177 | ) 178 | } 179 | 180 | #[test] 181 | fn extend_match() { 182 | let mut a = Match::with_matched(16, 3, vec![1, 2, 3]); 183 | let b = Match::with_matched(8, 3, vec![5, 6, 7]); 184 | 185 | let s = Scoring::default(); 186 | 187 | a.extend_with(&b, &s); 188 | 189 | assert_eq!(a.score(), 24 - s.penalty_distance); 190 | assert_eq!(a.consecutive, 0); 191 | assert_eq!(a.matched_indices().len(), 6); 192 | } 193 | 194 | #[test] 195 | fn extend_match_cont() { 196 | let mut a = Match::with_matched(16, 3, vec![1, 2, 3]); 197 | let b = Match::with_matched(8, 3, vec![4, 5, 6]); 198 | 199 | let s = Scoring::default(); 200 | 201 | a.extend_with(&b, &s); 202 | 203 | assert_eq!(a.score(), 16 + 8 + (3 + 3 + 1) * s.bonus_consecutive); 204 | assert_eq!(a.consecutive, 3 + 3 + 1); 205 | assert_eq!(a.matched_indices().len(), 6); 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/parsing.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use std::iter::FromIterator; 3 | 4 | pub type CharSet = HashSet; 5 | pub type Occurrences = HashMap>; 6 | 7 | #[derive(Clone, Debug)] 8 | pub struct Occurrence { 9 | pub target_idx: usize, 10 | pub is_start: bool, 11 | pub char: char, 12 | } 13 | 14 | impl Eq for Occurrence {} 15 | 16 | impl PartialEq for Occurrence { 17 | fn eq(&self, other: &Occurrence) -> bool { 18 | self.target_idx == other.target_idx 19 | && self.char == other.char 20 | && self.is_start == other.is_start 21 | } 22 | } 23 | 24 | pub fn build_occurrences(query: &QueryChars, string: &str, case_insensitive: bool) -> Occurrences { 25 | let query_chars = condense(query, case_insensitive); 26 | 27 | let mut occurrences = HashMap::new(); 28 | 29 | let lower = string.to_lowercase(); 30 | 31 | let mut prev_is_upper = false; 32 | let mut prev_is_sep = true; 33 | let mut prev_is_start = false; 34 | 35 | for (i, (lower_c, original_c)) in lower.chars().zip(string.chars()).enumerate() { 36 | let mut is_start = false; 37 | let is_sep = is_word_sep(original_c); 38 | let is_upper = original_c.is_uppercase(); 39 | 40 | let key_char = if case_insensitive { 41 | lower_c 42 | } else { 43 | original_c 44 | }; 45 | 46 | if is_sep { 47 | prev_is_upper = false; 48 | prev_is_sep = true; 49 | prev_is_start = false; 50 | 51 | if query_chars.contains(&key_char) { 52 | occurrences 53 | .entry(key_char) 54 | .or_insert(Vec::new()) 55 | .push(Occurrence { 56 | char: original_c, 57 | target_idx: i, 58 | is_start, 59 | }); 60 | } 61 | 62 | continue; 63 | } 64 | 65 | if prev_is_sep { 66 | is_start = true; 67 | } else { 68 | if !prev_is_start && (prev_is_upper != is_upper) { 69 | is_start = true; 70 | } 71 | } 72 | 73 | if query_chars.contains(&key_char) { 74 | occurrences 75 | .entry(key_char) 76 | .or_insert(Vec::new()) 77 | .push(Occurrence { 78 | char: original_c, 79 | target_idx: i, 80 | is_start, 81 | }); 82 | } 83 | 84 | prev_is_start = is_start; 85 | prev_is_sep = is_sep; 86 | prev_is_upper = is_upper; 87 | } 88 | 89 | occurrences 90 | } 91 | 92 | fn is_word_sep(c: char) -> bool { 93 | !c.is_alphanumeric() 94 | } 95 | 96 | fn condense(s: &QueryChars, case_insensitive: bool) -> CharSet { 97 | HashSet::from_iter(s.iter().map(|qc| { 98 | if case_insensitive { 99 | qc.lower 100 | } else { 101 | qc.original 102 | } 103 | })) 104 | } 105 | 106 | pub type QueryChars = Vec; 107 | 108 | #[derive(Clone, Debug)] 109 | pub struct QueryChar { 110 | pub original: char, 111 | pub lower: char, 112 | } 113 | 114 | impl Eq for QueryChar {} 115 | 116 | impl PartialEq for QueryChar { 117 | fn eq(&self, other: &QueryChar) -> bool { 118 | self.original == other.original && self.lower == other.lower 119 | } 120 | } 121 | 122 | pub fn process_query(query: &str) -> QueryChars { 123 | let lower_query = query.to_lowercase(); 124 | 125 | query 126 | .chars() 127 | .zip(lower_query.chars()) 128 | .filter_map(|(original, lower)| { 129 | if original.is_whitespace() { 130 | return None; 131 | } 132 | 133 | Some(QueryChar { original, lower }) 134 | }) 135 | .collect::>() 136 | } 137 | 138 | #[cfg(test)] 139 | mod tests { 140 | use std::collections::HashSet; 141 | use std::iter::FromIterator; 142 | 143 | use super::{build_occurrences, condense, is_word_sep, process_query, Occurrence, QueryChar}; 144 | 145 | #[test] 146 | fn word_seps() { 147 | let seps: Vec = vec![ 148 | '/', '\\', '|', '_', '-', ' ', '\t', ':', '.', ',', '~', '>', '<', 149 | ]; 150 | 151 | assert!(seps.into_iter().all(|s| is_word_sep(s))); 152 | } 153 | 154 | #[test] 155 | fn condense_casing() { 156 | assert_eq!( 157 | condense(&process_query("SCC"), true), 158 | HashSet::from_iter(vec!['s', 'c']), 159 | "Query chars not lowercased" 160 | ); 161 | assert_eq!( 162 | condense(&process_query("SCC"), false), 163 | HashSet::from_iter(vec!['S', 'C']), 164 | "Query chars not matching original case" 165 | ); 166 | } 167 | 168 | #[test] 169 | fn query_processing() { 170 | assert_eq!( 171 | vec![ 172 | QueryChar { 173 | lower: 'a', 174 | original: 'a' 175 | }, 176 | QueryChar { 177 | lower: 'b', 178 | original: 'b' 179 | }, 180 | QueryChar { 181 | lower: 'c', 182 | original: 'c' 183 | } 184 | ], 185 | process_query("a b c"), 186 | "Whitespace not removed" 187 | ); 188 | 189 | assert_eq!( 190 | vec![ 191 | QueryChar { 192 | lower: 'a', 193 | original: 'A' 194 | }, 195 | QueryChar { 196 | lower: 'b', 197 | original: 'B' 198 | }, 199 | QueryChar { 200 | lower: 'c', 201 | original: 'C' 202 | } 203 | ], 204 | process_query("ABC") 205 | ); 206 | } 207 | 208 | #[test] 209 | fn occurrence_eq() { 210 | let a = Occurrence { 211 | char: 'c', 212 | target_idx: 0, 213 | is_start: true, 214 | }; 215 | 216 | assert_eq!( 217 | a, 218 | Occurrence { 219 | char: 'c', 220 | target_idx: 0, 221 | is_start: true 222 | } 223 | ); 224 | assert_ne!( 225 | a, 226 | Occurrence { 227 | char: 'c', 228 | target_idx: 0, 229 | is_start: false 230 | }, 231 | "is_start differs but eq" 232 | ); 233 | assert_ne!( 234 | a, 235 | Occurrence { 236 | char: 'c', 237 | target_idx: 1, 238 | is_start: true 239 | }, 240 | "target_idx differs but eq" 241 | ); 242 | 243 | assert_ne!( 244 | a, 245 | Occurrence { 246 | char: 'b', 247 | target_idx: 0, 248 | is_start: true 249 | }, 250 | "char differs but eq" 251 | ); 252 | } 253 | 254 | #[test] 255 | fn occurrences() { 256 | let t = "SoccerCartoonController"; 257 | 258 | let mut occs = build_occurrences(&process_query("scc"), t, true); 259 | 260 | assert_eq!(occs.len(), 2); 261 | 262 | let s = occs.remove(&'s').expect("Missing s occurrences"); 263 | 264 | assert_eq!( 265 | s, 266 | vec![Occurrence { 267 | char: 'S', 268 | target_idx: 0, 269 | is_start: true, 270 | }] 271 | ); 272 | 273 | let c = occs.remove(&'c').expect("Missing c occurrences"); 274 | 275 | assert_eq!( 276 | c, 277 | vec![ 278 | Occurrence { 279 | char: 'c', 280 | target_idx: 2, 281 | is_start: false, 282 | }, 283 | Occurrence { 284 | char: 'c', 285 | target_idx: 3, 286 | is_start: false, 287 | }, 288 | Occurrence { 289 | char: 'C', 290 | target_idx: 6, 291 | is_start: true, 292 | }, 293 | Occurrence { 294 | char: 'C', 295 | target_idx: 13, 296 | is_start: true, 297 | }, 298 | ] 299 | ); 300 | } 301 | 302 | #[test] 303 | fn occurrences_2() { 304 | let t = "SccsCoolController"; 305 | 306 | let mut occs = build_occurrences(&process_query("scc"), t, true); 307 | 308 | assert_eq!(occs.len(), 2); 309 | 310 | let s = occs.remove(&'s').expect("Missing s occurrences"); 311 | 312 | assert_eq!( 313 | s, 314 | vec![ 315 | Occurrence { 316 | char: 'S', 317 | target_idx: 0, 318 | is_start: true, 319 | }, 320 | Occurrence { 321 | char: 's', 322 | target_idx: 3, 323 | is_start: false, 324 | } 325 | ] 326 | ); 327 | 328 | let c = occs.remove(&'c').expect("Missing c occurrences"); 329 | 330 | assert_eq!( 331 | c, 332 | vec![ 333 | Occurrence { 334 | char: 'c', 335 | target_idx: 1, 336 | is_start: false, 337 | }, 338 | Occurrence { 339 | char: 'c', 340 | target_idx: 2, 341 | is_start: false, 342 | }, 343 | Occurrence { 344 | char: 'C', 345 | target_idx: 4, 346 | is_start: true, 347 | }, 348 | Occurrence { 349 | char: 'C', 350 | target_idx: 8, 351 | is_start: true, 352 | }, 353 | ] 354 | ); 355 | } 356 | } 357 | -------------------------------------------------------------------------------- /src/scoring.rs: -------------------------------------------------------------------------------- 1 | pub static DEFAULT_SCORING: Scoring = Scoring { 2 | bonus_consecutive: 8, 3 | bonus_word_start: 72, 4 | bonus_match_case: 8, 5 | penalty_distance: 4, 6 | }; 7 | 8 | /// Bonuses/penalties used for scoring a [`Match`](crate::matching::Match). 9 | #[derive(Clone, Debug)] 10 | #[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))] 11 | pub struct Scoring { 12 | /// `current_consecutive_count * bonus_consecutive` will be added for every 13 | /// consecutive char match. 14 | /// 15 | /// `1 * bonus` for the first consecutive match, `2 * bonus` for 16 | /// the second, etc. 17 | pub bonus_consecutive: isize, 18 | /// Added when a query char matches a word start. 19 | pub bonus_word_start: isize, 20 | /// Added when the matched query char also matches the case of the target char. 21 | /// 22 | /// Only applied if the search is case insensitive. 23 | pub bonus_match_case: isize, 24 | /// Subtracted from the score for every char between two matches. 25 | pub penalty_distance: isize, 26 | } 27 | 28 | impl Scoring { 29 | /// Creates a new configuration with the given bonuses/penalties. 30 | pub fn new( 31 | bonus_consecutive: isize, 32 | bonus_word_start: isize, 33 | bonus_match_case: isize, 34 | penalty_distance: isize, 35 | ) -> Self { 36 | Scoring { 37 | bonus_consecutive, 38 | bonus_word_start, 39 | bonus_match_case, 40 | penalty_distance, 41 | } 42 | } 43 | 44 | /// Creates a configuration that emphasizes matching word starts (this is also the default). 45 | pub fn emphasize_word_starts() -> Self { 46 | Self::default() 47 | } 48 | 49 | /// Creates a configuration that emphasizes short distances between matched chars. 50 | pub fn emphasize_distance() -> Self { 51 | Scoring::new(12, 24, 8, 8) 52 | } 53 | } 54 | 55 | impl Default for Scoring { 56 | /// Creates a default configuration, see [`Scoring::emphasize_word_starts`]. 57 | fn default() -> Self { 58 | DEFAULT_SCORING.clone() 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/search.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use matching::Match; 4 | use parsing::Occurrences; 5 | use scoring::Scoring; 6 | 7 | use crate::{ 8 | parsing::{build_occurrences, process_query, Occurrence, QueryChar, QueryChars}, 9 | scoring::DEFAULT_SCORING, 10 | }; 11 | 12 | /// Describes a fuzzy search. Alternative to [`best_match`](crate::best_match) which allows for more configuration. 13 | /// 14 | /// # Examples 15 | /// 16 | /// Basic usage: 17 | /// 18 | /// ```rust 19 | /// use sublime_fuzzy::{FuzzySearch, Scoring}; 20 | /// 21 | /// let scoring = Scoring::emphasize_word_starts(); 22 | /// 23 | /// let result = FuzzySearch::new("something", "Some Search Thing") 24 | /// .score_with(&scoring) 25 | /// .case_insensitive() 26 | /// .best_match(); 27 | /// 28 | /// assert!(result.is_some()); 29 | /// ``` 30 | pub struct FuzzySearch<'a> { 31 | query: &'a str, 32 | target: &'a str, 33 | scoring: Option<&'a Scoring>, 34 | case_insensitive: bool, 35 | } 36 | 37 | impl<'a> FuzzySearch<'a> { 38 | /// Creates a new search to match `query` in `target`. 39 | /// 40 | /// Note that whitespace in query will be _ignored_. 41 | pub fn new(query: &'a str, target: &'a str) -> Self { 42 | FuzzySearch { 43 | query, 44 | target, 45 | scoring: None, 46 | case_insensitive: true, 47 | } 48 | } 49 | 50 | /// Use custom scoring values. 51 | /// 52 | /// If not specified will use `Scoring::default()`. 53 | pub fn score_with(mut self, scoring: &'a Scoring) -> Self { 54 | self.scoring = Some(scoring); 55 | 56 | self 57 | } 58 | 59 | /// Only match query chars in the target string if case matches. 60 | /// 61 | /// [`Scoring::bonus_match_case`] will not be applied if this is set (because a char match will 62 | /// always also be a case match). 63 | pub fn case_sensitive(mut self) -> Self { 64 | self.case_insensitive = false; 65 | 66 | self 67 | } 68 | 69 | /// Ignore case when matching query chars in the target string. 70 | /// 71 | /// If not only the char but also the case matches, [`Scoring::bonus_match_case`] will be added to 72 | /// the score. If that behavior is not wanted the bonus can be set to 0 with custom scoring. 73 | pub fn case_insensitive(mut self) -> Self { 74 | self.case_insensitive = true; 75 | 76 | self 77 | } 78 | 79 | /// Finds the best match of the query in the target string. 80 | /// 81 | /// Always tries to match the _full_ pattern. A partial match is considered 82 | /// invalid and will return [`None`]. Will also return [`None`] in case the query or 83 | /// target string are empty. 84 | pub fn best_match(self) -> Option { 85 | let processed_query = process_query(self.query); 86 | 87 | if processed_query.len() == 0 || self.target.len() == 0 { 88 | return None; 89 | } 90 | 91 | let occurrences = build_occurrences(&processed_query, self.target, self.case_insensitive); 92 | 93 | let searcher = FuzzySearcher::new( 94 | processed_query, 95 | self.scoring.unwrap_or(&DEFAULT_SCORING), 96 | self.case_insensitive, 97 | ); 98 | 99 | searcher.best_match(&occurrences) 100 | } 101 | } 102 | 103 | struct FuzzySearcher<'a> { 104 | query: QueryChars, 105 | scoring: &'a Scoring, 106 | match_cache: HashMap<(usize, usize, usize), Option>, 107 | case_insensitive: bool, 108 | } 109 | 110 | impl<'a> FuzzySearcher<'a> { 111 | fn new(query: QueryChars, scoring: &'a Scoring, case_insensitive: bool) -> Self { 112 | FuzzySearcher { 113 | match_cache: HashMap::with_capacity(query.len() * query.len()), 114 | query, 115 | scoring, 116 | case_insensitive, 117 | } 118 | } 119 | 120 | #[inline(always)] 121 | fn queried_char(&self, qc: &QueryChar) -> char { 122 | if self.case_insensitive { 123 | qc.lower 124 | } else { 125 | qc.original 126 | } 127 | } 128 | 129 | #[inline(always)] 130 | fn case_bonus(&self, query_idx: usize, occurrence: &Occurrence) -> isize { 131 | if self.case_insensitive { 132 | self.query 133 | .get(query_idx) 134 | .map_or(0, |c| (c.original == occurrence.char) as isize) 135 | * self.scoring.bonus_match_case 136 | } else { 137 | 0 138 | } 139 | } 140 | 141 | fn best_match(mut self, occurrences: &Occurrences) -> Option { 142 | let qc = self.query.get(0)?; 143 | 144 | occurrences 145 | .get(&self.queried_char(qc))? 146 | .iter() 147 | .filter_map(|o| self.match_(1, o, 0, &occurrences)) 148 | .max() 149 | } 150 | 151 | fn match_( 152 | &mut self, 153 | query_idx: usize, 154 | occurrence: &Occurrence, 155 | consecutive: usize, 156 | occurrences: &Occurrences, 157 | ) -> Option { 158 | let this_key = (query_idx, occurrence.target_idx, consecutive); 159 | 160 | // Already scored sub-tree 161 | if let Some(cached) = self.match_cache.get(&this_key) { 162 | return cached.clone(); 163 | } 164 | 165 | let next_char = self.query.get(query_idx); 166 | 167 | let score = consecutive as isize * self.scoring.bonus_consecutive 168 | + occurrence.is_start as isize * self.scoring.bonus_word_start 169 | + self.case_bonus(query_idx - 1, occurrence); 170 | 171 | let mut this_match = Match::with_matched(score, consecutive, vec![occurrence.target_idx]); 172 | 173 | // Successfully matched all query chars 174 | if next_char.is_none() { 175 | self.match_cache.insert(this_key, Some(this_match.clone())); 176 | 177 | return Some(this_match); 178 | } 179 | 180 | let occs = occurrences.get(&self.queried_char(next_char.unwrap())); 181 | 182 | // Reached end of target without matching all query chars 183 | if occs.is_none() { 184 | self.match_cache.insert(this_key, None); 185 | 186 | return None; 187 | } 188 | 189 | let best_match = occs 190 | .unwrap() 191 | .iter() 192 | .filter(|&o| o.target_idx > occurrence.target_idx) 193 | .filter_map(|o| { 194 | let distance = o.target_idx - occurrence.target_idx; 195 | 196 | let new_consecutive = if distance == 1 { consecutive + 1 } else { 0 }; 197 | 198 | self.match_(query_idx + 1, o, new_consecutive, occurrences) 199 | }) 200 | .max() 201 | .and_then(|m| { 202 | this_match.extend_with(&m, &self.scoring); 203 | 204 | Some(this_match) 205 | }); 206 | 207 | self.match_cache.insert(this_key, best_match.clone()); 208 | 209 | best_match 210 | } 211 | } 212 | --------------------------------------------------------------------------------