├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── common.rs ├── fzf_common.rs ├── fzf_v1.rs └── fzf_v2.rs ├── examples └── cities.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── fzf.rs ├── rustfmt.toml ├── src ├── candidate.rs ├── case_sensitivity.rs ├── lib.rs ├── matched_ranges.rs ├── metric.rs ├── metrics │ ├── fzf │ │ ├── candidate.rs │ │ ├── distance.rs │ │ ├── fzf.rs │ │ ├── fzf_v1.rs │ │ ├── fzf_v2.rs │ │ ├── mod.rs │ │ ├── parser.rs │ │ ├── query.rs │ │ ├── scheme.rs │ │ ├── scoring.rs │ │ └── slab.rs │ └── mod.rs ├── normalize.rs └── utils.rs └── tests ├── fzf_common.rs ├── fzf_v1.rs └── fzf_v2.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | schedule: 11 | - cron: '0 0 1 * *' 12 | 13 | jobs: 14 | test: 15 | name: test 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: dtolnay/rust-toolchain@nightly 20 | - run: cargo test --all-features --no-fail-fast 21 | 22 | bench: 23 | name: bench 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v2 27 | - uses: dtolnay/rust-toolchain@nightly 28 | - run: cargo bench --all-features --no-run 29 | 30 | clippy-all-features: 31 | name: clippy-all-features 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v2 35 | - uses: dtolnay/rust-toolchain@nightly 36 | with: 37 | components: clippy 38 | - run: cargo clippy --all-features --all-targets -- -D warnings 39 | 40 | clippy-no-features: 41 | name: clippy-no-features 42 | runs-on: ubuntu-latest 43 | steps: 44 | - uses: actions/checkout@v2 45 | - uses: dtolnay/rust-toolchain@nightly 46 | with: 47 | components: clippy 48 | - run: cargo clippy --all-targets -- -D warnings 49 | 50 | docs: 51 | name: docs 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v2 55 | - uses: dtolnay/rust-toolchain@nightly 56 | - run: RUSTDOCFLAGS="--cfg docsrs" cargo doc --all-features 57 | 58 | format: 59 | name: format 60 | runs-on: ubuntu-latest 61 | steps: 62 | - uses: actions/checkout@v2 63 | - uses: dtolnay/rust-toolchain@nightly 64 | with: 65 | components: rustfmt 66 | - run: cargo fmt --check 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] 4 | 5 | ## [0.1.1] - Dec 2 2023 6 | 7 | Fixed the CI pipeline of the previous release. 8 | 9 | ## [0.1.0] - Dec 2 2023 10 | 11 | Initial public release. 12 | 13 | [Unreleased]: https://github.com/nomad/norm/compare/v0.1.1...HEAD 14 | [0.1.0]: https://github.com/nomad/norm/tree/v0.1.1 15 | [0.1.0]: https://github.com/nomad/norm/tree/v0.1.0 16 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "norm" 3 | version = "0.1.1" 4 | edition = "2021" 5 | authors = ["Riccardo Mazzarini "] 6 | description = "A collection of distance metrics on strings" 7 | documentation = "https://docs.rs/norm" 8 | repository = "https://github.com/nomad/norm" 9 | readme = "README.md" 10 | license = "MIT" 11 | keywords = ["text", "fuzzy", "match", "search", "fzf"] 12 | categories = ["algorithms"] 13 | exclude = ["/.github", "/fuzz", "CHANGELOG.md"] 14 | 15 | [package.metadata.docs.rs] 16 | features = ["fzf-v1", "fzf-v2"] 17 | rustdoc-args = ["--cfg", "docsrs"] 18 | 19 | [features] 20 | fzf-v1 = ["__any-metric"] 21 | fzf-v2 = ["__any-metric"] 22 | 23 | # Private features. 24 | __any-metric = [] 25 | __benches = [] 26 | __into-score = [] 27 | __tests = ["fzf-v1", "fzf-v2"] 28 | 29 | [dependencies] 30 | memchr = "2" 31 | 32 | [dev-dependencies] 33 | criterion = "0.5" 34 | 35 | [[test]] 36 | name = "fzf_common" 37 | required-features = ["__tests"] 38 | 39 | [[test]] 40 | name = "fzf_v1" 41 | required-features = ["__tests"] 42 | 43 | [[test]] 44 | name = "fzf_v2" 45 | required-features = ["__tests"] 46 | 47 | [[bench]] 48 | name = "fzf_common" 49 | harness = false 50 | required-features = ["__benches"] 51 | 52 | [[bench]] 53 | name = "fzf_v1" 54 | harness = false 55 | required-features = ["__benches"] 56 | 57 | [[bench]] 58 | name = "fzf_v2" 59 | harness = false 60 | required-features = ["__benches"] 61 | 62 | [[example]] 63 | name = "cities" 64 | required-features = ["fzf-v2"] 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Riccardo Mazzarini 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 📐 norm 2 | 3 | [![Latest version]](https://crates.io/crates/norm) 4 | [![Docs badge]](https://docs.rs/norm) 5 | [![CI]](https://github.com/nomad/norm/actions) 6 | 7 | [Latest version]: https://img.shields.io/crates/v/norm.svg 8 | [Docs badge]: https://docs.rs/norm/badge.svg 9 | [CI]: https://github.com/nomad/norm/actions/workflows/ci.yml/badge.svg 10 | 11 | norm is a collection of different distance metrics on stings. This problem is 12 | sometimes referred to as "string similarity search", or more colloquially 13 | "fuzzy matching". 14 | 15 | ## Available metrics 16 | 17 | - `FzfV1`: port of the algorithm used by fzf when launching with `--algo=v1`; 18 | - `FzfV2`: port of the algorithm used by fzf when launching without any extra 19 | flags or with `--algo=v2`; 20 | 21 | ## Performance 22 | 23 | Performance is a top priority for this crate. Our goal is to have the fastest 24 | implementation of every metric algorithm we provide, across all languages. 25 | [Here][bench] you can find a number of benchmarks comparing norm's metrics to 26 | each other, as well as to other popular libraries. 27 | 28 | ## Example usage 29 | 30 | ```rust 31 | use std::ops::Range; 32 | 33 | use norm::fzf::{FzfParser, FzfV2}; 34 | use norm::Metric; 35 | 36 | let mut fzf = FzfV2::new(); 37 | 38 | let mut parser = FzfParser::new(); 39 | 40 | let query = parser.parse("aa"); 41 | 42 | let cities = ["Geneva", "Ulaanbaatar", "New York City", "Adelaide"]; 43 | 44 | let mut results = cities 45 | .iter() 46 | .copied() 47 | .filter_map(|city| fzf.distance(query, city).map(|dist| (city, dist))) 48 | .collect::>(); 49 | 50 | // We sort the results by distance in ascending order, so that the best match 51 | // will be at the front of the vector. 52 | results.sort_by_key(|(_city, dist)| *dist); 53 | 54 | assert_eq!(results.len(), 2); 55 | assert_eq!(results[0].0, "Adelaide"); 56 | assert_eq!(results[1].0, "Ulaanbaatar"); 57 | 58 | // We can also find out which sub-strings of each candidate matched the query. 59 | 60 | let mut ranges: Vec> = Vec::new(); 61 | 62 | let _ = fzf.distance_and_ranges(query, results[0].0, &mut ranges); 63 | assert_eq!(ranges.len(), 2); 64 | assert_eq!(ranges[0], 0..1); // "A" in "Adelaide" 65 | assert_eq!(ranges[1], 4..5); // "a" in "Adelaide" 66 | 67 | ranges.clear(); 68 | 69 | let _ = fzf.distance_and_ranges(query, results[1].0, &mut ranges); 70 | assert_eq!(ranges.len(), 1); 71 | assert_eq!(ranges[0], 2..4); // The first "aa" in "Ulaanbaatar" 72 | ``` 73 | 74 | [bench]: https://github.com/noib3/fuzzy-benches 75 | -------------------------------------------------------------------------------- /benches/common.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use core::ops::Range; 4 | 5 | use criterion::{ 6 | measurement::WallTime, 7 | BenchmarkGroup, 8 | BenchmarkId, 9 | Throughput, 10 | }; 11 | use norm::CaseSensitivity; 12 | 13 | pub trait Metric { 14 | type Query<'a>: Copy; 15 | 16 | type Parser: Parser; 17 | 18 | fn dist(&mut self, query: Self::Query<'_>, candidate: &str); 19 | 20 | fn dist_and_ranges( 21 | &mut self, 22 | query: Self::Query<'_>, 23 | candidate: &str, 24 | ranges: &mut Vec>, 25 | ); 26 | 27 | fn set_case_sensitivity( 28 | &mut self, 29 | case_sensitivity: CaseSensitivity, 30 | ) -> &mut Self; 31 | } 32 | 33 | pub trait Parser: Default { 34 | fn parse<'a>(&'a mut self, s: &str) -> M::Query<'a>; 35 | } 36 | 37 | // TODO: docs 38 | fn param( 39 | case: CaseSensitivity, 40 | with_ranges: bool, 41 | suffix: Option<&str>, 42 | ) -> String { 43 | let mut s = String::new(); 44 | 45 | let case = match case { 46 | CaseSensitivity::Sensitive => "case_sensitive", 47 | CaseSensitivity::Insensitive => "case_insensitive", 48 | CaseSensitivity::Smart => "case_smart", 49 | }; 50 | 51 | s.push_str(case); 52 | 53 | let ranges = if with_ranges { "_with_ranges" } else { "" }; 54 | 55 | s.push_str(ranges); 56 | 57 | if let Some(suffix) = suffix { 58 | s.push('_'); 59 | s.push_str(suffix); 60 | } 61 | 62 | s 63 | } 64 | 65 | // TODO: docs 66 | fn for_all_cases_and_ranges( 67 | mut metric: M, 68 | function: &str, 69 | suffix: Option<&str>, 70 | mut fun: F, 71 | ) where 72 | M: Metric, 73 | F: FnMut(&mut M, BenchmarkId, Option<&mut Vec>>), 74 | { 75 | for case in [ 76 | CaseSensitivity::Sensitive, 77 | CaseSensitivity::Insensitive, 78 | CaseSensitivity::Smart, 79 | ] { 80 | for with_ranges in [true, false] { 81 | metric.set_case_sensitivity(case); 82 | let param = param(case, with_ranges, suffix); 83 | let mut ranges = with_ranges.then(Vec::new); 84 | fun( 85 | &mut metric, 86 | BenchmarkId::new(function, param), 87 | ranges.as_mut(), 88 | ); 89 | } 90 | } 91 | } 92 | 93 | // TODO: docs 94 | fn bench<'a, M, C>( 95 | group: &mut BenchmarkGroup, 96 | id: BenchmarkId, 97 | metric: &mut M, 98 | query: &str, 99 | candidates: C, 100 | ranges: Option<&mut Vec>>, 101 | ) where 102 | M: Metric, 103 | C: IntoIterator, 104 | C::IntoIter: ExactSizeIterator + Clone, 105 | { 106 | let mut parser = M::Parser::default(); 107 | 108 | let query = parser.parse(query); 109 | 110 | let candidates = candidates.into_iter(); 111 | 112 | group.throughput(Throughput::Elements(candidates.len() as u64)); 113 | 114 | if let Some(ranges) = ranges { 115 | group.bench_function(id, |b| { 116 | b.iter(|| { 117 | for candidate in candidates.clone() { 118 | metric.dist_and_ranges(query, candidate, ranges); 119 | } 120 | }) 121 | }); 122 | } else { 123 | group.bench_function(id, |b| { 124 | b.iter(|| { 125 | for candidate in candidates.clone() { 126 | metric.dist(query, candidate); 127 | } 128 | }) 129 | }); 130 | } 131 | } 132 | 133 | pub const MEDIUM_TEXT: &str = 134 | "Far far away, behind the word mountains, far from the countries Vokalia \ 135 | and Consonantia, there live the blind texts. Separated they live in \ 136 | Bookmarksgrove right at the coast of the Semantics, a large."; 137 | 138 | pub const LONG_TEXT: &str = 139 | "Far far away, behind the word mountains, far from the countries Vokalia \ 140 | and Consonantia, there live the blind texts. Separated they live in \ 141 | Bookmarksgrove right at the coast of the Semantics, a large language \ 142 | ocean. A small river named Duden flows by their place and supplies it \ 143 | with the necessary regelialia. It is a paradisematic country, in which \ 144 | roasted parts of sentences fly into your mouth. Even the all-powerful \ 145 | Pointing has no control about the blind texts it is an almost \ 146 | unorthographic life"; 147 | 148 | // TODO: docs 149 | pub fn short( 150 | metric: M, 151 | suffix: Option<&str>, 152 | mut group: BenchmarkGroup, 153 | ) { 154 | for_all_cases_and_ranges(metric, "short", suffix, |metric, id, ranges| { 155 | let query = "paradise"; 156 | let candidates = core::iter::once("paradisematic"); 157 | bench(&mut group, id, metric, query, candidates, ranges); 158 | }) 159 | } 160 | 161 | // TODO: docs 162 | pub fn medium_start( 163 | metric: M, 164 | suffix: Option<&str>, 165 | mut group: BenchmarkGroup, 166 | ) { 167 | for_all_cases_and_ranges( 168 | metric, 169 | "medium_start", 170 | suffix, 171 | |metric, id, ranges| { 172 | let query = "away"; 173 | let candidates = core::iter::once(MEDIUM_TEXT); 174 | bench(&mut group, id, metric, query, candidates, ranges); 175 | }, 176 | ) 177 | } 178 | 179 | // TODO: docs 180 | pub fn medium_middle( 181 | metric: M, 182 | suffix: Option<&str>, 183 | mut group: BenchmarkGroup, 184 | ) { 185 | for_all_cases_and_ranges( 186 | metric, 187 | "medium_middle", 188 | suffix, 189 | |metric, id, ranges| { 190 | let query = "blind"; 191 | let candidates = core::iter::once(MEDIUM_TEXT); 192 | bench(&mut group, id, metric, query, candidates, ranges); 193 | }, 194 | ) 195 | } 196 | 197 | // TODO: docs 198 | pub fn medium_end( 199 | metric: M, 200 | suffix: Option<&str>, 201 | mut group: BenchmarkGroup, 202 | ) { 203 | for_all_cases_and_ranges( 204 | metric, 205 | "medium_end", 206 | suffix, 207 | |metric, id, ranges| { 208 | let query = "Semantics"; 209 | let candidates = core::iter::once(MEDIUM_TEXT); 210 | bench(&mut group, id, metric, query, candidates, ranges); 211 | }, 212 | ) 213 | } 214 | 215 | // TODO: docs 216 | pub fn long_start( 217 | metric: M, 218 | suffix: Option<&str>, 219 | mut group: BenchmarkGroup, 220 | ) { 221 | for_all_cases_and_ranges( 222 | metric, 223 | "long_start", 224 | suffix, 225 | |metric, id, ranges| { 226 | let query = "mountains"; 227 | let candidates = core::iter::once(LONG_TEXT); 228 | bench(&mut group, id, metric, query, candidates, ranges); 229 | }, 230 | ) 231 | } 232 | 233 | // TODO: docs 234 | pub fn long_middle( 235 | metric: M, 236 | suffix: Option<&str>, 237 | mut group: BenchmarkGroup, 238 | ) { 239 | for_all_cases_and_ranges( 240 | metric, 241 | "long_middle", 242 | suffix, 243 | |metric, id, ranges| { 244 | let query = "Duden"; 245 | let candidates = core::iter::once(LONG_TEXT); 246 | bench(&mut group, id, metric, query, candidates, ranges); 247 | }, 248 | ) 249 | } 250 | 251 | // TODO: docs 252 | pub fn long_end( 253 | metric: M, 254 | suffix: Option<&str>, 255 | mut group: BenchmarkGroup, 256 | ) { 257 | for_all_cases_and_ranges( 258 | metric, 259 | "long_end", 260 | suffix, 261 | |metric, id, ranges| { 262 | let query = "unorthographic"; 263 | let candidates = core::iter::once(LONG_TEXT); 264 | bench(&mut group, id, metric, query, candidates, ranges); 265 | }, 266 | ) 267 | } 268 | -------------------------------------------------------------------------------- /benches/fzf_common.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use criterion::{ 4 | criterion_group, 5 | criterion_main, 6 | measurement::WallTime, 7 | BenchmarkGroup, 8 | Criterion, 9 | Throughput, 10 | }; 11 | use norm::{ 12 | fzf::{FzfParser, FzfV1}, 13 | Metric, 14 | }; 15 | 16 | /// The char length of the queries. 17 | const QUERY_LEN: usize = 16; 18 | 19 | fn bench( 20 | group: &mut BenchmarkGroup, 21 | query: &str, 22 | candidate: &str, 23 | bench_name: &str, 24 | ) { 25 | // Using V1 or V2 doesn't matter because we're not doing fuzzy matching 26 | // here. 27 | let mut fzf = FzfV1::new(); 28 | 29 | let mut parser = FzfParser::new(); 30 | 31 | let query = parser.parse(query); 32 | 33 | group.throughput(Throughput::Elements(1)); 34 | 35 | group.bench_function(bench_name, |b| { 36 | b.iter(|| { 37 | let _ = fzf.distance(query, candidate); 38 | }) 39 | }); 40 | } 41 | 42 | fn group(c: &mut Criterion) -> BenchmarkGroup { 43 | c.benchmark_group("fzf") 44 | } 45 | 46 | fn exact(c: &mut Criterion) { 47 | let mut group = group(c); 48 | 49 | let candidate = common::MEDIUM_TEXT; 50 | 51 | let query = { 52 | let midpoint = candidate.len() / 2; 53 | let start = midpoint - QUERY_LEN / 2; 54 | let end = start + QUERY_LEN; 55 | &candidate[start..end] 56 | }; 57 | 58 | let query = format!("'{query}"); 59 | 60 | bench(&mut group, &query, candidate, "exact"); 61 | } 62 | 63 | fn prefix(c: &mut Criterion) { 64 | let mut group = group(c); 65 | 66 | let candidate = common::MEDIUM_TEXT; 67 | 68 | let query = format!("^{query}", query = &candidate[..QUERY_LEN]); 69 | 70 | bench(&mut group, &query, candidate, "prefix"); 71 | } 72 | 73 | fn suffix(c: &mut Criterion) { 74 | let mut group = group(c); 75 | 76 | let candidate = common::MEDIUM_TEXT; 77 | 78 | let query = 79 | format!("{query}$", query = &candidate[candidate.len() - QUERY_LEN..]); 80 | 81 | bench(&mut group, &query, candidate, "suffix"); 82 | } 83 | 84 | fn equal(c: &mut Criterion) { 85 | let mut group = group(c); 86 | 87 | let candidate = &common::MEDIUM_TEXT[..QUERY_LEN]; 88 | 89 | let query = format!("^{candidate}$"); 90 | 91 | bench(&mut group, &query, candidate, "equal"); 92 | } 93 | 94 | criterion_group!(benches, exact, prefix, suffix, equal); 95 | criterion_main!(benches); 96 | -------------------------------------------------------------------------------- /benches/fzf_v1.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | mod common; 4 | 5 | use common as bench; 6 | use criterion::{ 7 | criterion_group, 8 | criterion_main, 9 | measurement::WallTime, 10 | BenchmarkGroup, 11 | Criterion, 12 | }; 13 | use norm::{ 14 | fzf::{FzfParser, FzfQuery, FzfV1}, 15 | CaseSensitivity, 16 | Metric, 17 | }; 18 | 19 | impl bench::Parser for FzfParser { 20 | fn parse<'a>(&'a mut self, s: &str) -> FzfQuery<'a> { 21 | self.parse(s) 22 | } 23 | } 24 | 25 | impl bench::Metric for FzfV1 { 26 | type Query<'a> = FzfQuery<'a>; 27 | 28 | type Parser = FzfParser; 29 | 30 | #[inline] 31 | fn dist(&mut self, query: FzfQuery, candidate: &str) { 32 | self.distance(query, candidate); 33 | } 34 | #[inline(always)] 35 | fn dist_and_ranges( 36 | &mut self, 37 | query: FzfQuery, 38 | candidate: &str, 39 | ranges: &mut Vec>, 40 | ) { 41 | self.distance_and_ranges(query, candidate, ranges); 42 | } 43 | fn set_case_sensitivity( 44 | &mut self, 45 | case_sensitivity: CaseSensitivity, 46 | ) -> &mut Self { 47 | self.set_case_sensitivity(case_sensitivity) 48 | } 49 | } 50 | 51 | fn group(c: &mut Criterion) -> BenchmarkGroup { 52 | c.benchmark_group("fzf_v1") 53 | } 54 | 55 | fn short(c: &mut Criterion) { 56 | bench::short(FzfV1::new(), None, group(c)); 57 | } 58 | 59 | fn medium_start(c: &mut Criterion) { 60 | bench::medium_start(FzfV1::new(), None, group(c)); 61 | } 62 | 63 | fn medium_middle(c: &mut Criterion) { 64 | bench::medium_middle(FzfV1::new(), None, group(c)); 65 | } 66 | 67 | fn medium_end(c: &mut Criterion) { 68 | bench::medium_end(FzfV1::new(), None, group(c)); 69 | } 70 | 71 | fn long_start(c: &mut Criterion) { 72 | bench::long_start(FzfV1::new(), None, group(c)); 73 | } 74 | 75 | fn long_middle(c: &mut Criterion) { 76 | bench::long_middle(FzfV1::new(), None, group(c)); 77 | } 78 | 79 | fn long_end(c: &mut Criterion) { 80 | bench::long_end(FzfV1::new(), None, group(c)); 81 | } 82 | 83 | criterion_group!( 84 | benches, 85 | short, 86 | medium_start, 87 | medium_middle, 88 | medium_end, 89 | long_start, 90 | long_middle, 91 | long_end, 92 | ); 93 | criterion_main!(benches); 94 | -------------------------------------------------------------------------------- /benches/fzf_v2.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | mod common; 4 | 5 | use common as bench; 6 | use criterion::{ 7 | criterion_group, 8 | criterion_main, 9 | measurement::WallTime, 10 | BenchmarkGroup, 11 | Criterion, 12 | }; 13 | use norm::{ 14 | fzf::{FzfParser, FzfQuery, FzfV2}, 15 | CaseSensitivity, 16 | Metric, 17 | }; 18 | 19 | impl bench::Parser for FzfParser { 20 | fn parse<'a>(&'a mut self, s: &str) -> FzfQuery<'a> { 21 | self.parse(s) 22 | } 23 | } 24 | 25 | impl bench::Metric for FzfV2 { 26 | type Query<'a> = FzfQuery<'a>; 27 | 28 | type Parser = FzfParser; 29 | 30 | #[inline(always)] 31 | fn dist(&mut self, query: FzfQuery, candidate: &str) { 32 | self.distance(query, candidate); 33 | } 34 | #[inline(always)] 35 | fn dist_and_ranges( 36 | &mut self, 37 | query: FzfQuery, 38 | candidate: &str, 39 | ranges: &mut Vec>, 40 | ) { 41 | self.distance_and_ranges(query, candidate, ranges); 42 | } 43 | fn set_case_sensitivity( 44 | &mut self, 45 | case_sensitivity: CaseSensitivity, 46 | ) -> &mut Self { 47 | self.set_case_sensitivity(case_sensitivity) 48 | } 49 | } 50 | 51 | fn group(c: &mut Criterion) -> BenchmarkGroup { 52 | c.benchmark_group("fzf_v2") 53 | } 54 | 55 | fn short(c: &mut Criterion) { 56 | bench::short(FzfV2::new(), None, group(c)); 57 | } 58 | 59 | fn medium_start(c: &mut Criterion) { 60 | bench::medium_start(FzfV2::new(), None, group(c)); 61 | } 62 | 63 | fn medium_middle(c: &mut Criterion) { 64 | bench::medium_middle(FzfV2::new(), None, group(c)); 65 | } 66 | 67 | fn medium_end(c: &mut Criterion) { 68 | bench::medium_end(FzfV2::new(), None, group(c)); 69 | } 70 | 71 | fn long_start(c: &mut Criterion) { 72 | bench::long_start(FzfV2::new(), None, group(c)); 73 | } 74 | 75 | fn long_middle(c: &mut Criterion) { 76 | bench::long_middle(FzfV2::new(), None, group(c)); 77 | } 78 | 79 | fn long_end(c: &mut Criterion) { 80 | bench::long_end(FzfV2::new(), None, group(c)); 81 | } 82 | 83 | criterion_group!( 84 | benches, 85 | short, 86 | medium_start, 87 | medium_middle, 88 | medium_end, 89 | long_start, 90 | long_middle, 91 | long_end, 92 | ); 93 | criterion_main!(benches); 94 | -------------------------------------------------------------------------------- /examples/cities.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | use norm::fzf::{FzfParser, FzfV2}; 4 | use norm::Metric; 5 | 6 | fn main() { 7 | let mut fzf = FzfV2::new(); 8 | 9 | let mut parser = FzfParser::new(); 10 | 11 | let query = parser.parse("aa"); 12 | 13 | let cities = ["Geneva", "Ulaanbaatar", "New York City", "Adelaide"]; 14 | 15 | let mut results = cities 16 | .iter() 17 | .copied() 18 | .filter_map(|city| fzf.distance(query, city).map(|dist| (city, dist))) 19 | .collect::>(); 20 | 21 | results.sort_by_key(|(_city, dist)| *dist); 22 | 23 | assert_eq!(results.len(), 2); 24 | assert_eq!(results[0].0, "Adelaide"); 25 | assert_eq!(results[1].0, "Ulaanbaatar"); 26 | 27 | let mut ranges: Vec> = Vec::new(); 28 | 29 | let _ = fzf.distance_and_ranges(query, results[0].0, &mut ranges); 30 | assert_eq!(ranges.len(), 2); 31 | assert_eq!(ranges[0], 0..1); 32 | assert_eq!(ranges[1], 4..5); 33 | 34 | ranges.clear(); 35 | 36 | let _ = fzf.distance_and_ranges(query, results[1].0, &mut ranges); 37 | assert_eq!(ranges.len(), 1); 38 | assert_eq!(ranges[0], 2..4); 39 | } 40 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "norm-fuzz" 3 | version = "0.0.0" 4 | authors = ["Automatically generated"] 5 | publish = false 6 | edition = "2021" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | norm = { path = "..", features = ["fzf-v1", "fzf-v2"] } 13 | libfuzzer-sys = { version = "0.4", features = ["arbitrary-derive"] } 14 | 15 | # Prevent this from interfering with workspaces 16 | [workspace] 17 | members = ["."] 18 | 19 | [[bin]] 20 | name = "fzf" 21 | path = "fuzz_targets/fzf.rs" 22 | test = false 23 | doc = false 24 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fzf.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use libfuzzer_sys::arbitrary::{self, Arbitrary}; 4 | use libfuzzer_sys::fuzz_target; 5 | use norm::fzf::{FzfParser, FzfScheme, FzfV1, FzfV2}; 6 | use norm::{CaseSensitivity, Metric}; 7 | 8 | #[derive(Arbitrary, Copy, Clone, Debug)] 9 | struct Query<'a>(&'a str); 10 | 11 | #[derive(Arbitrary, Clone, Debug)] 12 | struct Candidate<'a>(&'a str); 13 | 14 | fn with_opts(mut fun: F) 15 | where 16 | F: FnMut(CaseSensitivity, bool, FzfScheme), 17 | { 18 | for case_sensitivity in [ 19 | CaseSensitivity::Sensitive, 20 | CaseSensitivity::Insensitive, 21 | CaseSensitivity::Smart, 22 | ] { 23 | for normalization in [true, false] { 24 | for scheme in 25 | [FzfScheme::Default, FzfScheme::Path, FzfScheme::History] 26 | { 27 | fun(case_sensitivity, normalization, scheme) 28 | } 29 | } 30 | } 31 | } 32 | 33 | fuzz_target!(|data: (Query, Candidate)| { 34 | let (Query(query), Candidate(candidate)) = data; 35 | 36 | let mut parser = FzfParser::new(); 37 | 38 | let query = parser.parse(query); 39 | 40 | let mut fzf_v1 = FzfV1::new(); 41 | 42 | let mut fzf_v2 = FzfV2::new(); 43 | 44 | let mut ranges = Vec::new(); 45 | 46 | with_opts(|case_sensitivity, normalization, scheme| { 47 | let _ = fzf_v1 48 | .set_candidate_normalization(normalization) 49 | .set_case_sensitivity(case_sensitivity) 50 | .set_scoring_scheme(scheme) 51 | .distance_and_ranges(query, candidate, &mut ranges); 52 | 53 | for range in ranges.as_slice() { 54 | let _ = &candidate[range.clone()]; 55 | } 56 | 57 | let _ = fzf_v2 58 | .set_candidate_normalization(normalization) 59 | .set_case_sensitivity(case_sensitivity) 60 | .set_scoring_scheme(scheme) 61 | .distance_and_ranges(query, candidate, &mut ranges); 62 | 63 | for range in ranges.as_slice() { 64 | let _ = &candidate[range.clone()]; 65 | } 66 | }); 67 | }); 68 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_code_in_doc_comments = true 2 | format_strings = true 3 | group_imports = "StdExternalCrate" 4 | imports_layout = "HorizontalVertical" 5 | match_block_trailing_comma = true 6 | max_width = 79 7 | unstable_features = true 8 | use_field_init_shorthand = true 9 | use_small_heuristics = "Max" 10 | -------------------------------------------------------------------------------- /src/candidate.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | use crate::utils::*; 4 | 5 | /// TODO: docs 6 | #[derive(Copy, Clone)] 7 | pub(crate) enum Candidate<'a> { 8 | Ascii(&'a [u8]), 9 | Unicode(&'a [char]), 10 | } 11 | 12 | impl core::fmt::Debug for Candidate<'_> { 13 | #[inline] 14 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 15 | match self { 16 | Candidate::Ascii(slice) => { 17 | core::str::from_utf8(slice).unwrap().fmt(f) 18 | }, 19 | 20 | Candidate::Unicode(slice) => { 21 | slice.iter().collect::().fmt(f) 22 | }, 23 | } 24 | } 25 | } 26 | 27 | impl<'a> Candidate<'a> { 28 | /// TODO: docs 29 | #[inline(always)] 30 | pub fn char(self, char_idx: usize) -> char { 31 | match self { 32 | Candidate::Ascii(candidate) => candidate[char_idx] as _, 33 | Candidate::Unicode(candidate) => candidate[char_idx], 34 | } 35 | } 36 | 37 | /// TODO: docs 38 | #[inline(always)] 39 | pub fn chars(self) -> Chars<'a> { 40 | self.chars_from(0) 41 | } 42 | 43 | /// TODO: docs 44 | #[inline(always)] 45 | pub fn chars_from(self, char_offset: usize) -> Chars<'a> { 46 | match self { 47 | Candidate::Ascii(slice) => { 48 | Chars::Ascii(slice[char_offset..].iter()) 49 | }, 50 | Candidate::Unicode(slice) => { 51 | Chars::Unicode(slice[char_offset..].iter()) 52 | }, 53 | } 54 | } 55 | 56 | /// TODO: docs 57 | #[inline(always)] 58 | pub fn char_len(self) -> usize { 59 | match self { 60 | Candidate::Ascii(slice) => slice.len(), 61 | Candidate::Unicode(slice) => slice.len(), 62 | } 63 | } 64 | 65 | /// TODO: docs 66 | #[inline(always)] 67 | pub fn find_first_from( 68 | self, 69 | char_offset: usize, 70 | ch: char, 71 | is_case_sensitive: bool, 72 | char_eq: CharEq, 73 | ) -> Option { 74 | match self { 75 | Candidate::Ascii(slice) => { 76 | if !ch.is_ascii() { 77 | return None; 78 | } 79 | 80 | let slice = &slice[char_offset..]; 81 | 82 | find_first_ascii(ch as _, slice, is_case_sensitive) 83 | .map(|offset| offset + char_offset) 84 | }, 85 | 86 | Candidate::Unicode(slice) => { 87 | let slice = &slice[char_offset..]; 88 | 89 | find_first_unicode(ch, slice, char_eq) 90 | .map(|idx| idx + char_offset) 91 | }, 92 | } 93 | } 94 | 95 | /// TODO: docs 96 | #[inline(always)] 97 | pub fn find_last( 98 | self, 99 | ch: char, 100 | is_case_sensitive: bool, 101 | char_eq: CharEq, 102 | ) -> Option { 103 | match self { 104 | Candidate::Ascii(slice) => { 105 | if ch.is_ascii() { 106 | find_last_ascii(ch as _, slice, is_case_sensitive) 107 | } else { 108 | None 109 | } 110 | }, 111 | 112 | Candidate::Unicode(slice) => find_last_unicode(ch, slice, char_eq), 113 | } 114 | } 115 | 116 | /// TODO: docs 117 | #[allow(dead_code)] 118 | #[inline(always)] 119 | pub fn find_last_from( 120 | self, 121 | end_offset: usize, 122 | ch: char, 123 | is_case_sensitive: bool, 124 | char_eq: CharEq, 125 | ) -> Option { 126 | match self { 127 | Candidate::Ascii(slice) => { 128 | if ch.is_ascii() { 129 | let slice = &slice[..end_offset]; 130 | find_last_ascii(ch as _, slice, is_case_sensitive) 131 | } else { 132 | None 133 | } 134 | }, 135 | 136 | Candidate::Unicode(slice) => { 137 | let slice = &slice[..end_offset]; 138 | find_last_unicode(ch, slice, char_eq) 139 | }, 140 | } 141 | } 142 | 143 | /// TODO: docs 144 | #[inline(always)] 145 | pub fn leading_spaces(self) -> usize { 146 | match self { 147 | Candidate::Ascii(slice) => { 148 | slice.iter().take_while(|&&ch| ch == b' ').count() 149 | }, 150 | 151 | Candidate::Unicode(slice) => { 152 | slice.iter().take_while(|&&ch| ch == ' ').count() 153 | }, 154 | } 155 | } 156 | 157 | /// TODO: docs 158 | #[inline(always)] 159 | pub fn matches( 160 | self, 161 | ch: char, 162 | is_case_sensitive: bool, 163 | char_eq: CharEq, 164 | ) -> CandidateMatches<'a> { 165 | match self { 166 | Candidate::Ascii(slice) => { 167 | CandidateMatches::from_ascii(ch, slice, is_case_sensitive, 0) 168 | }, 169 | 170 | Candidate::Unicode(slice) => { 171 | CandidateMatches::from_unicode(ch, slice, char_eq, 0) 172 | }, 173 | } 174 | } 175 | 176 | /// TODO: docs 177 | #[inline(always)] 178 | pub fn matches_from( 179 | self, 180 | char_offset: usize, 181 | ch: char, 182 | is_case_sensitive: bool, 183 | char_eq: CharEq, 184 | ) -> CandidateMatches<'a> { 185 | match self { 186 | Candidate::Ascii(slice) => { 187 | let slice = &slice[char_offset..]; 188 | CandidateMatches::from_ascii( 189 | ch, 190 | slice, 191 | is_case_sensitive, 192 | char_offset, 193 | ) 194 | }, 195 | 196 | Candidate::Unicode(slice) => { 197 | let slice = &slice[char_offset..]; 198 | CandidateMatches::from_unicode(ch, slice, char_eq, char_offset) 199 | }, 200 | } 201 | } 202 | 203 | /// TODO: docs 204 | #[inline(always)] 205 | pub fn slice(self, char_range: Range) -> Self { 206 | match self { 207 | Candidate::Ascii(slice) => Candidate::Ascii(&slice[char_range]), 208 | 209 | Candidate::Unicode(slice) => { 210 | Candidate::Unicode(&slice[char_range]) 211 | }, 212 | } 213 | } 214 | 215 | /// TODO: docs 216 | #[inline(always)] 217 | pub fn to_byte_offset(self, char_offset: usize) -> usize { 218 | match self { 219 | Candidate::Ascii(_) => char_offset, 220 | Candidate::Unicode(slice) => { 221 | slice[..char_offset].iter().map(|&ch| ch.len_utf8()).sum() 222 | }, 223 | } 224 | } 225 | 226 | /// TODO: docs 227 | #[inline(always)] 228 | pub fn to_byte_range(self, char_range: Range) -> Range { 229 | match self { 230 | Candidate::Ascii(_) => char_range, 231 | 232 | Candidate::Unicode(slice) => { 233 | let mut chars = slice[..char_range.end].iter(); 234 | 235 | let start = chars 236 | .by_ref() 237 | .map(|&ch| ch.len_utf8()) 238 | .take(char_range.start) 239 | .sum::(); 240 | 241 | let end = 242 | start + chars.map(|&ch| ch.len_utf8()).sum::(); 243 | 244 | start..end 245 | }, 246 | } 247 | } 248 | 249 | /// TODO: docs 250 | #[inline(always)] 251 | pub fn trailing_spaces(self) -> usize { 252 | match self { 253 | Candidate::Ascii(slice) => { 254 | slice.iter().rev().take_while(|&&ch| ch == b' ').count() 255 | }, 256 | 257 | Candidate::Unicode(slice) => { 258 | slice.iter().rev().take_while(|&&ch| ch == ' ').count() 259 | }, 260 | } 261 | } 262 | } 263 | 264 | #[inline(always)] 265 | fn find_first_ascii( 266 | needle: u8, 267 | haystack: &[u8], 268 | is_case_sensitive: bool, 269 | ) -> Option { 270 | if is_case_sensitive || !needle.is_ascii_alphabetic() { 271 | memchr::memchr(needle, haystack) 272 | } else { 273 | memchr::memchr2(needle, ascii_letter_flip_case(needle), haystack) 274 | } 275 | } 276 | 277 | #[inline(always)] 278 | fn find_last_ascii( 279 | needle: u8, 280 | haystack: &[u8], 281 | is_case_sensitive: bool, 282 | ) -> Option { 283 | if is_case_sensitive || !needle.is_ascii_alphabetic() { 284 | memchr::memrchr(needle, haystack) 285 | } else { 286 | memchr::memrchr2(needle, ascii_letter_flip_case(needle), haystack) 287 | } 288 | } 289 | 290 | #[inline(always)] 291 | fn find_first_unicode( 292 | needle: char, 293 | haystack: &[char], 294 | char_eq: CharEq, 295 | ) -> Option { 296 | haystack 297 | .iter() 298 | .enumerate() 299 | .find_map(|(idx, &ch)| char_eq(needle, ch).then_some(idx)) 300 | } 301 | 302 | #[inline(always)] 303 | fn find_last_unicode( 304 | needle: char, 305 | haystack: &[char], 306 | char_eq: CharEq, 307 | ) -> Option { 308 | haystack 309 | .iter() 310 | .enumerate() 311 | .rev() 312 | .find_map(|(idx, &ch)| char_eq(needle, ch).then_some(idx)) 313 | } 314 | 315 | /// TODO: docs 316 | pub(crate) enum Chars<'a> { 317 | Ascii(core::slice::Iter<'a, u8>), 318 | Unicode(core::slice::Iter<'a, char>), 319 | } 320 | 321 | impl Iterator for Chars<'_> { 322 | type Item = char; 323 | 324 | #[inline(always)] 325 | fn next(&mut self) -> Option { 326 | match self { 327 | Chars::Ascii(iter) => iter.next().copied().map(char::from), 328 | Chars::Unicode(iter) => iter.next().copied(), 329 | } 330 | } 331 | } 332 | 333 | impl DoubleEndedIterator for Chars<'_> { 334 | #[inline(always)] 335 | fn next_back(&mut self) -> Option { 336 | match self { 337 | Chars::Ascii(iter) => iter.next_back().copied().map(char::from), 338 | Chars::Unicode(iter) => iter.next_back().copied(), 339 | } 340 | } 341 | } 342 | 343 | /// TODO: docs 344 | pub(crate) struct CandidateMatches<'a> { 345 | iter: CandidateMatchesIter<'a>, 346 | start_offset: usize, 347 | } 348 | 349 | impl<'a> CandidateMatches<'a> { 350 | #[inline(always)] 351 | fn from_ascii( 352 | needle: char, 353 | haystack: &'a [u8], 354 | is_case_sensitive: bool, 355 | start_offset: usize, 356 | ) -> Self { 357 | if !needle.is_ascii() { 358 | return Self::from_unicode(needle, &[], char_eq(false, false), 0); 359 | } 360 | 361 | let needle = needle as u8; 362 | 363 | let iter = if is_case_sensitive || !needle.is_ascii_alphabetic() { 364 | CandidateMatchesIter::Memchr(memchr::Memchr::new(needle, haystack)) 365 | } else { 366 | CandidateMatchesIter::Memchr2(memchr::Memchr2::new( 367 | needle, 368 | ascii_letter_flip_case(needle), 369 | haystack, 370 | )) 371 | }; 372 | 373 | Self { iter, start_offset } 374 | } 375 | 376 | #[inline(always)] 377 | fn from_unicode( 378 | needle: char, 379 | haystack: &'a [char], 380 | char_eq: CharEq, 381 | start_offset: usize, 382 | ) -> Self { 383 | let iter = UnicodeMatches::new(needle, haystack, char_eq); 384 | Self { iter: CandidateMatchesIter::Unicode(iter), start_offset } 385 | } 386 | } 387 | 388 | enum CandidateMatchesIter<'a> { 389 | Memchr(memchr::Memchr<'a>), 390 | Memchr2(memchr::Memchr2<'a>), 391 | Unicode(UnicodeMatches<'a>), 392 | } 393 | 394 | impl Iterator for CandidateMatches<'_> { 395 | type Item = usize; 396 | 397 | #[inline(always)] 398 | fn next(&mut self) -> Option { 399 | match &mut self.iter { 400 | CandidateMatchesIter::Memchr(memchr) => memchr.next(), 401 | CandidateMatchesIter::Memchr2(memchr2) => memchr2.next(), 402 | CandidateMatchesIter::Unicode(unicode) => unicode.next(), 403 | } 404 | .map(|offset| self.start_offset + offset) 405 | } 406 | } 407 | 408 | struct UnicodeMatches<'a> { 409 | needle: char, 410 | haystack: &'a [char], 411 | char_eq: CharEq, 412 | offset: usize, 413 | } 414 | 415 | impl<'a> UnicodeMatches<'a> { 416 | fn new(ch: char, haystack: &'a [char], char_eq: CharEq) -> Self { 417 | Self { needle: ch, haystack, char_eq, offset: 0 } 418 | } 419 | } 420 | 421 | impl Iterator for UnicodeMatches<'_> { 422 | type Item = usize; 423 | 424 | #[inline(always)] 425 | fn next(&mut self) -> Option { 426 | let idx = 427 | self.haystack.iter().enumerate().find_map(|(idx, &ch)| { 428 | (self.char_eq)(self.needle, ch).then_some(idx) 429 | })?; 430 | 431 | self.haystack = &self.haystack[idx + 1..]; 432 | 433 | let offset = self.offset + idx; 434 | 435 | self.offset = offset + 1; 436 | 437 | Some(offset) 438 | } 439 | } 440 | -------------------------------------------------------------------------------- /src/case_sensitivity.rs: -------------------------------------------------------------------------------- 1 | /// Case sensitivity modes for search. 2 | /// 3 | /// This defines the different types of case sensitivity that can be used when 4 | /// searching for the characters of a query in a candidate string. 5 | #[derive(Copy, Clone, Debug, Default)] 6 | pub enum CaseSensitivity { 7 | /// The search is case-sensitive. For a successful match the case of the 8 | /// letters in the candidate must exactly match the case of the letters in 9 | /// the query. 10 | Sensitive, 11 | 12 | /// The search is case-insensitive. In this mode, the case of letters is 13 | /// ignored, allowing for matches regardless of whether the letters in the 14 | /// query and candidate are upper or lower case. 15 | Insensitive, 16 | 17 | /// In this mode, the case-sensitivity of the search is determined 18 | /// dynamically based on the letters of the query. If the query contains 19 | /// one or more uppercase letters the search is treated as case-sensitive, 20 | /// otherwise it's case-insensitive. 21 | #[default] 22 | Smart, 23 | } 24 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides a collection of different distance metrics on strings. 2 | //! 3 | //! This problem is sometimes referred to as "string similarity search", or 4 | //! more colloquially "fuzzy matching". Given a query string and a number of 5 | //! possible candidate strings, the goal is to: 6 | //! 7 | //! a) filter out the candidates that are too dissimilar from the query; 8 | //! 9 | //! b) rank the remaining candidates by their similarity to the query. 10 | //! 11 | //! Here both of these tasks are accomplished by implementing the [`Metric`] 12 | //! trait. This trait is at the basis of norm's design, and it is implemented 13 | //! by all of our metrics. Reading its documentation is a good place to start. 14 | //! 15 | //! # Performance 16 | //! 17 | //! Performance is a top priority for this crate. Our goal is to have the 18 | //! fastest implementation of every metric algorithm we provide, across all 19 | //! languages. [Here][bench] you can find a number of benchmarks comparing 20 | //! norm's metrics to each other, as well as to other popular libraries. 21 | //! 22 | //! # Examples 23 | //! 24 | //! ```rust 25 | //! # use core::ops::Range; 26 | //! use norm::fzf::{FzfParser, FzfV2}; 27 | //! use norm::Metric; 28 | //! 29 | //! let mut fzf = FzfV2::new(); 30 | //! 31 | //! let mut parser = FzfParser::new(); 32 | //! 33 | //! let query = parser.parse("aa"); 34 | //! 35 | //! let cities = ["Geneva", "Ulaanbaatar", "New York City", "Adelaide"]; 36 | //! 37 | //! let mut results = cities 38 | //! .iter() 39 | //! .copied() 40 | //! .filter_map(|city| fzf.distance(query, city).map(|dist| (city, dist))) 41 | //! .collect::>(); 42 | //! 43 | //! // We sort the results by distance in ascending order, so that the best 44 | //! // match will be at the front of the vector. 45 | //! results.sort_by_key(|(_city, dist)| *dist); 46 | //! 47 | //! assert_eq!(results.len(), 2); 48 | //! assert_eq!(results[0].0, "Adelaide"); 49 | //! assert_eq!(results[1].0, "Ulaanbaatar"); 50 | //! 51 | //! // We can also find out which sub-strings of each candidate matched the 52 | //! // query. 53 | //! 54 | //! let mut ranges: Vec> = Vec::new(); 55 | //! 56 | //! let _ = fzf.distance_and_ranges(query, results[0].0, &mut ranges); 57 | //! assert_eq!(ranges.len(), 2); 58 | //! assert_eq!(ranges[0], 0..1); // "A" in "Adelaide" 59 | //! assert_eq!(ranges[1], 4..5); // "a" in "Adelaide" 60 | //! 61 | //! ranges.clear(); 62 | //! 63 | //! let _ = fzf.distance_and_ranges(query, results[1].0, &mut ranges); 64 | //! assert_eq!(ranges.len(), 1); 65 | //! assert_eq!(ranges[0], 2..4); // The first "aa" in "Ulaanbaatar" 66 | //! ``` 67 | //! 68 | //! # Features flags 69 | //! 70 | //! - `fzf-v1`: enables the [`FzfV1`](metrics::fzf::FzfV1) metric; 71 | //! - `fzf-v2`: enables the [`FzfV2`](metrics::fzf::FzfV2) metric; 72 | //! 73 | //! [bench]: https://github.com/noib3/fuzzy-benches 74 | 75 | #![cfg_attr(docsrs, feature(doc_cfg))] 76 | #![allow(clippy::module_inception)] 77 | #![allow(clippy::needless_range_loop)] 78 | #![allow(clippy::too_many_arguments)] 79 | #![deny(missing_docs)] 80 | #![deny(rustdoc::broken_intra_doc_links)] 81 | #![deny(rustdoc::private_intra_doc_links)] 82 | 83 | extern crate alloc; 84 | 85 | #[cfg(feature = "__any-metric")] 86 | mod candidate; 87 | mod case_sensitivity; 88 | #[cfg(feature = "__any-metric")] 89 | mod matched_ranges; 90 | mod metric; 91 | #[cfg(feature = "__any-metric")] 92 | mod metrics; 93 | #[cfg(feature = "__any-metric")] 94 | mod normalize; 95 | #[cfg(feature = "__any-metric")] 96 | mod utils; 97 | 98 | #[cfg(feature = "__any-metric")] 99 | use candidate::{Candidate, CandidateMatches}; 100 | pub use case_sensitivity::CaseSensitivity; 101 | #[cfg(feature = "__any-metric")] 102 | use matched_ranges::MatchedRanges; 103 | pub use metric::Metric; 104 | #[cfg(feature = "__any-metric")] 105 | pub use metrics::*; 106 | -------------------------------------------------------------------------------- /src/matched_ranges.rs: -------------------------------------------------------------------------------- 1 | use core::cmp::Ordering; 2 | use core::ops::Range; 3 | 4 | /// TODO: docs 5 | pub(crate) struct MatchedRanges<'a> { 6 | ranges: &'a mut Vec>, 7 | initial_len: usize, 8 | } 9 | 10 | impl<'a> From<&'a mut Vec>> for MatchedRanges<'a> { 11 | #[inline(always)] 12 | fn from(ranges: &'a mut Vec>) -> Self { 13 | let initial_len = ranges.len(); 14 | Self { ranges, initial_len } 15 | } 16 | } 17 | 18 | impl core::fmt::Debug for MatchedRanges<'_> { 19 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 20 | use core::fmt::Write; 21 | 22 | let (initial, this) = self.ranges.split_at(self.initial_len); 23 | 24 | if this.is_empty() { 25 | return initial.fmt(f); 26 | } 27 | 28 | f.write_char('[')?; 29 | 30 | for (idx, initial) in initial.iter().enumerate() { 31 | write!(f, "{initial:?}")?; 32 | if idx + 1 < initial.len() { 33 | f.write_str(", ")?; 34 | } 35 | } 36 | 37 | f.write_str(" | ")?; 38 | 39 | for (idx, this) in this.iter().enumerate() { 40 | write!(f, "{this:?}")?; 41 | if idx + 1 < this.len() { 42 | f.write_str(", ")?; 43 | } 44 | } 45 | 46 | f.write_char(']')?; 47 | 48 | Ok(()) 49 | } 50 | } 51 | 52 | impl<'a> MatchedRanges<'a> { 53 | /// TODO: docs 54 | #[inline(always)] 55 | fn binary_search_by<'r, F>(&'r self, fun: F) -> Result 56 | where 57 | F: FnMut(&'r Range) -> Ordering, 58 | { 59 | self.ranges[self.initial_len..].binary_search_by(fun) 60 | } 61 | 62 | /// TODO: docs 63 | #[inline(always)] 64 | fn get_mut(&mut self, idx: usize) -> Option<&mut Range> { 65 | self.ranges.get_mut(self.initial_len + idx) 66 | } 67 | 68 | /// TODO: docs 69 | #[inline(always)] 70 | pub(crate) fn insert(&mut self, new_range: Range) { 71 | let insert_idx = match self 72 | .binary_search_by(|range| range.start.cmp(&new_range.start)) 73 | { 74 | Err(idx) => idx, 75 | 76 | // The range at `idx` and the new range have the same start. 77 | Ok(idx) => { 78 | let (range, next_range) = { 79 | let (left, right) = self.split_at_mut(idx + 1); 80 | (&mut left[idx], right.get_mut(0)) 81 | }; 82 | 83 | if range.end >= new_range.end { 84 | // The new range is completely contained within this 85 | // existing range. 86 | return; 87 | } 88 | 89 | if let Some(next_range) = next_range { 90 | if new_range.end >= next_range.start { 91 | // The new range fills the gap between this range and 92 | // the next one. 93 | range.end = next_range.end; 94 | self.remove(idx + 1); 95 | return; 96 | } 97 | } 98 | 99 | range.end = new_range.end; 100 | 101 | return; 102 | }, 103 | }; 104 | 105 | if insert_idx == 0 { 106 | let Some(first_range) = self.get_mut(0) else { 107 | // This is the first range. 108 | self.push(new_range); 109 | return; 110 | }; 111 | 112 | if new_range.end >= first_range.start { 113 | first_range.start = new_range.start; 114 | } else { 115 | self.insert_at(0, new_range); 116 | } 117 | 118 | return; 119 | } 120 | 121 | if insert_idx == self.len() { 122 | let last_range = self.last_mut().unwrap(); 123 | 124 | if new_range.start <= last_range.end { 125 | last_range.end = last_range.end.max(new_range.end); 126 | } else { 127 | self.push(new_range); 128 | } 129 | 130 | return; 131 | } 132 | 133 | let (prev_range, next_range) = { 134 | let (left, right) = self.split_at_mut(insert_idx); 135 | (&mut left[insert_idx - 1], &mut right[0]) 136 | }; 137 | 138 | match ( 139 | new_range.start <= prev_range.end, 140 | new_range.end >= next_range.start, 141 | ) { 142 | // The new range fills the gap between two existing ranges, so 143 | // we merge them. 144 | // 145 | // ------ ------ => --------------- 146 | // xxxxxxx 147 | (true, true) => { 148 | prev_range.end = next_range.end; 149 | self.remove(insert_idx); 150 | }, 151 | 152 | // The new range starts within an existing range but ends before 153 | // the next one starts, so we extend the end of the existing range. 154 | // 155 | // ------ ------ => -------- ------ 156 | // xxxx 157 | (true, false) if new_range.end > prev_range.end => { 158 | prev_range.end = new_range.end; 159 | }, 160 | 161 | // The new range ends within an existing range but starts after 162 | // the previous one ends, so we extend the start of the existing 163 | // range. 164 | // 165 | // ------ ------ => ------ -------- 166 | // xxxx 167 | (false, true) => { 168 | next_range.start = new_range.start; 169 | }, 170 | 171 | // The new range is strictly within an existing gap, so we just 172 | // insert it. 173 | // ------ ------ => ------ ----- ------ 174 | // xxxxx 175 | (false, false) => { 176 | self.insert_at(insert_idx, new_range); 177 | }, 178 | 179 | _ => {}, 180 | } 181 | } 182 | 183 | /// TODO: docs 184 | #[inline(always)] 185 | fn insert_at(&mut self, idx: usize, range: Range) { 186 | self.ranges.insert(self.initial_len + idx, range); 187 | } 188 | 189 | /// TODO: docs 190 | #[inline(always)] 191 | fn last_mut(&mut self) -> Option<&mut Range> { 192 | self.ranges.last_mut() 193 | } 194 | 195 | /// TODO: docs 196 | #[inline(always)] 197 | fn len(&self) -> usize { 198 | self.ranges.len() - self.initial_len 199 | } 200 | 201 | /// TODO: docs 202 | #[inline(always)] 203 | fn push(&mut self, range: Range) { 204 | self.ranges.push(range); 205 | } 206 | 207 | /// TODO: docs 208 | #[inline(always)] 209 | fn remove(&mut self, idx: usize) -> Range { 210 | self.ranges.remove(self.initial_len + idx) 211 | } 212 | 213 | /// TODO: docs 214 | #[inline(always)] 215 | fn split_at_mut( 216 | &mut self, 217 | idx: usize, 218 | ) -> (&mut [Range], &mut [Range]) { 219 | let len = self.initial_len; 220 | let (left, right) = self.ranges.split_at_mut(len + idx); 221 | let left = &mut left[len..]; 222 | (left, right) 223 | } 224 | } 225 | 226 | #[cfg(test)] 227 | mod tests { 228 | #![allow(clippy::single_range_in_vec_init)] 229 | 230 | use super::*; 231 | 232 | impl<'a> MatchedRanges<'a> { 233 | fn as_slice(&self) -> &[Range] { 234 | &self.ranges[..] 235 | } 236 | } 237 | 238 | fn ranges() -> MatchedRanges<'static> { 239 | let vec = Box::leak(Box::default()); 240 | MatchedRanges::from(vec) 241 | } 242 | 243 | #[test] 244 | fn matched_ranges_insert_same_start_increasing_end() { 245 | let mut ranges = ranges(); 246 | ranges.insert(0..1); 247 | ranges.insert(0..2); 248 | ranges.insert(0..3); 249 | assert_eq!(ranges.as_slice(), [0..3]); 250 | ranges.insert(0..2); 251 | assert_eq!(ranges.as_slice(), [0..3]); 252 | } 253 | 254 | #[test] 255 | fn matched_ranges_insert_consecutive_1() { 256 | let mut ranges = ranges(); 257 | ranges.insert(0..1); 258 | ranges.insert(1..2); 259 | ranges.insert(2..3); 260 | assert_eq!(ranges.as_slice(), [0..3]); 261 | } 262 | 263 | #[test] 264 | fn matched_ranges_insert_consecutive_2() { 265 | let mut ranges = ranges(); 266 | ranges.insert(2..3); 267 | ranges.insert(1..2); 268 | ranges.insert(0..1); 269 | assert_eq!(ranges.as_slice(), [0..3]); 270 | } 271 | 272 | #[test] 273 | fn matched_ranges_insert_fill_gap() { 274 | let mut ranges = ranges(); 275 | ranges.insert(0..1); 276 | ranges.insert(2..3); 277 | assert_eq!(ranges.as_slice(), [0..1, 2..3]); 278 | ranges.insert(1..2); 279 | assert_eq!(ranges.as_slice(), [0..3]); 280 | } 281 | 282 | #[test] 283 | fn matched_ranges_insert_extend_end() { 284 | let mut ranges = ranges(); 285 | ranges.insert(0..2); 286 | ranges.insert(4..6); 287 | ranges.insert(1..3); 288 | assert_eq!(ranges.as_slice(), [0..3, 4..6]); 289 | } 290 | 291 | #[test] 292 | fn matched_ranges_insert_extend_start() { 293 | let mut ranges = ranges(); 294 | ranges.insert(0..2); 295 | ranges.insert(4..6); 296 | ranges.insert(3..5); 297 | assert_eq!(ranges.as_slice(), [0..2, 3..6]); 298 | } 299 | 300 | #[test] 301 | fn matched_ranges_insert_in_gap() { 302 | let mut ranges = ranges(); 303 | ranges.insert(0..4); 304 | ranges.insert(6..8); 305 | ranges.insert(10..14); 306 | assert_eq!(ranges.as_slice(), [0..4, 6..8, 10..14]); 307 | } 308 | 309 | #[test] 310 | fn matched_ranges_insert_smaller_1() { 311 | let mut ranges = ranges(); 312 | ranges.insert(3..8); 313 | ranges.insert(4..7); 314 | assert_eq!(ranges.as_slice(), [3..8]); 315 | ranges.insert(5..6); 316 | assert_eq!(ranges.as_slice(), [3..8]); 317 | } 318 | 319 | #[test] 320 | fn matched_ranges_insert_smaller_2() { 321 | let mut ranges = ranges(); 322 | ranges.insert(1..2); 323 | ranges.insert(3..8); 324 | ranges.insert(4..7); 325 | assert_eq!(ranges.as_slice(), [1..2, 3..8]); 326 | ranges.insert(5..6); 327 | assert_eq!(ranges.as_slice(), [1..2, 3..8]); 328 | } 329 | 330 | #[test] 331 | fn matched_ranges_insert_smaller_3() { 332 | let mut ranges = ranges(); 333 | ranges.insert(10..11); 334 | ranges.insert(3..8); 335 | ranges.insert(4..7); 336 | assert_eq!(ranges.as_slice(), [3..8, 10..11]); 337 | ranges.insert(5..6); 338 | assert_eq!(ranges.as_slice(), [3..8, 10..11]); 339 | } 340 | 341 | #[test] 342 | fn matched_ranges_insert_smaller_4() { 343 | let mut ranges = ranges(); 344 | ranges.insert(1..2); 345 | ranges.insert(10..11); 346 | ranges.insert(3..8); 347 | ranges.insert(4..7); 348 | assert_eq!(ranges.as_slice(), [1..2, 3..8, 10..11]); 349 | ranges.insert(5..6); 350 | assert_eq!(ranges.as_slice(), [1..2, 3..8, 10..11]); 351 | } 352 | } 353 | -------------------------------------------------------------------------------- /src/metric.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | /// A trait representing a distance metric on strings. 4 | /// 5 | /// This trait encapsulates the logic for comparing a query to a candidate 6 | /// string. It allows to filter out non-matches, to sort the remaining 7 | /// candidates based on the quality of the match, and to show which sub-strings 8 | /// of a candidate matched the query. 9 | pub trait Metric { 10 | /// The type of query to be found in the candidate. 11 | /// 12 | /// This is generic over an associated lifetime `'a` to allow for zero-copy 13 | /// parsing of the query. Metrics that don't parse queries can simply use 14 | /// a `&'a str` here. 15 | type Query<'a>; 16 | 17 | /// The type that expresses how closely a candidate matches the query. 18 | /// 19 | /// In order to behave like a distance, its [`Ord`] implementation must be 20 | /// such that given two candidates `a` and `b`, it holds that 21 | /// 22 | /// ``` 23 | /// # use core::cmp::Ordering; 24 | /// # let a_distance = 0; 25 | /// # let b_distance = 1; 26 | /// # let _ = 27 | /// a_distance.cmp(&b_distance) == Ordering::Less 28 | /// # ; 29 | /// ``` 30 | /// 31 | /// if and only if `a` is a better match than `b`. In other words, a lower 32 | /// distance value must indicate a more relevant match. 33 | type Distance: Ord; 34 | 35 | /// This method calculates the "distance" between an instance of the 36 | /// metric's [`Query`][Self::Query] type and a candidate string. 37 | /// 38 | /// A return value of `Some(distance)` means that the metric considers the 39 | /// candidate to be a match for the query, with the `distance` being the 40 | /// measure of how good the match is: the better the match, the lower the 41 | /// distance. 42 | /// 43 | /// A return value of `None` means that the candidate does not match the 44 | /// query and should be filtered out of the search results. 45 | fn distance( 46 | &mut self, 47 | query: Self::Query<'_>, 48 | candidate: &str, 49 | ) -> Option; 50 | 51 | /// This method has the same semantics and return value as 52 | /// [`Self::distance`], but in the case of a match it also appends the 53 | /// **byte** ranges of the candidate that matched the query to the provided 54 | /// buffer. 55 | /// 56 | /// The appended ranges are guaranteed to be non-overlapping, but the order 57 | /// in which they are appended is not specified by this trait's contract. 58 | /// Any [`Metric`] implementation is free to choose its order as long as 59 | /// the ranges don't overlap. 60 | /// 61 | /// If the candidate doesn't match the query, the buffer is left untouched. 62 | /// 63 | /// # Examples 64 | /// 65 | /// ```rust 66 | /// # use norm::fzf::{FzfV1, FzfParser}; 67 | /// # use norm::Metric; 68 | /// let mut fzf = FzfV1::new(); 69 | /// let mut parser = FzfParser::new(); 70 | /// let mut ranges = Vec::new(); 71 | /// 72 | /// let query = parser.parse("foo"); 73 | /// 74 | /// let distance = fzf.distance_and_ranges(query, "bar", &mut ranges); 75 | /// 76 | /// assert!(distance.is_none()); 77 | /// 78 | /// // The candidate wasn't a match, so `ranges` is still empty. 79 | /// assert!(ranges.is_empty()); 80 | /// ``` 81 | /// 82 | /// ```rust 83 | /// # use norm::fzf::{FzfV1, FzfParser}; 84 | /// # use norm::Metric; 85 | /// let mut fzf = FzfV1::new(); 86 | /// let mut parser = FzfParser::new(); 87 | /// let mut ranges = Vec::new(); 88 | /// 89 | /// let query = parser.parse("foo"); 90 | /// 91 | /// let _ = fzf.distance_and_ranges(query, "seafood", &mut ranges); 92 | /// 93 | /// // There was a match, so the vector should now contain the byte range 94 | /// // of "foo" in "seafood". 95 | /// assert_eq!(ranges, [3..6]); 96 | /// 97 | /// let _ = fzf.distance_and_ranges(query, "fancy igloo", &mut ranges); 98 | /// 99 | /// // You can call `distance_and_ranges` multiple times with the same 100 | /// // vector, and it will keep appending to it. 101 | /// // 102 | /// // In this case, it appended the byte ranges of "f" and "oo" in 103 | /// // "fancy igloo". 104 | /// assert_eq!(ranges, [3..6, 0..1, 9..11]); 105 | /// ``` 106 | /// 107 | /// ```rust 108 | /// # use norm::fzf::{FzfV1, FzfParser}; 109 | /// # use norm::Metric; 110 | /// let mut fzf = FzfV1::new(); 111 | /// let mut parser = FzfParser::new(); 112 | /// let mut ranges = Vec::new(); 113 | /// 114 | /// fzf.set_candidate_normalization(true); 115 | /// 116 | /// let query = parser.parse("foo"); 117 | /// 118 | /// let _ = fzf.distance_and_ranges(query, "ƒöö", &mut ranges); 119 | /// 120 | /// // The start and end of each range are always byte offsets, not 121 | /// // character offsets. 122 | /// assert_eq!(ranges, [0..6]); 123 | /// ``` 124 | fn distance_and_ranges( 125 | &mut self, 126 | query: Self::Query<'_>, 127 | candidate: &str, 128 | ranges_buf: &mut Vec>, 129 | ) -> Option; 130 | } 131 | -------------------------------------------------------------------------------- /src/metrics/fzf/candidate.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use crate::utils::*; 3 | use crate::{Candidate, CandidateMatches}; 4 | 5 | /// TODO: docs 6 | pub(super) struct CandidateV2<'a> { 7 | /// TODO: docs 8 | bonuses: &'a mut [Bonus], 9 | 10 | /// TODO: docs 11 | base: Candidate<'a>, 12 | 13 | /// TODO: docs 14 | initial_char_class: CharClass, 15 | 16 | /// TODO: docs 17 | opts: CandidateOpts, 18 | } 19 | 20 | /// TODO: docs 21 | #[derive(Clone, Copy)] 22 | pub(super) struct CandidateOpts { 23 | /// TODO: docs 24 | pub char_eq: CharEq, 25 | 26 | /// TODO: docs 27 | pub is_case_sensitive: bool, 28 | } 29 | 30 | impl Default for CandidateOpts { 31 | #[inline(always)] 32 | fn default() -> Self { 33 | Self { char_eq: char_eq(false, false), is_case_sensitive: false } 34 | } 35 | } 36 | 37 | impl CandidateOpts { 38 | #[inline(always)] 39 | pub fn new(is_case_sensitive: bool, is_normalized: bool) -> Self { 40 | Self { 41 | char_eq: char_eq(is_case_sensitive, is_normalized), 42 | is_case_sensitive, 43 | } 44 | } 45 | } 46 | 47 | impl<'a> CandidateV2<'a> { 48 | #[inline(always)] 49 | pub fn bonus_at(&mut self, char_idx: usize, scheme: &Scheme) -> Score { 50 | let bonus = &mut self.bonuses[char_idx]; 51 | 52 | if bonus.is_set() { 53 | return bonus.value(); 54 | } 55 | 56 | let prev_class = if char_idx == 0 { 57 | self.initial_char_class 58 | } else { 59 | char_class(self.char(char_idx - 1), scheme) 60 | }; 61 | 62 | let this_class = char_class(self.char(char_idx), scheme); 63 | 64 | let bonus = &mut self.bonuses[char_idx]; 65 | 66 | bonus.set(compute_bonus(prev_class, this_class, scheme)); 67 | 68 | bonus.value() 69 | } 70 | 71 | #[inline(always)] 72 | pub fn char(&self, char_idx: usize) -> char { 73 | self.base.char(char_idx) 74 | } 75 | 76 | #[inline(always)] 77 | pub fn char_len(&self) -> usize { 78 | self.base.char_len() 79 | } 80 | 81 | #[inline(always)] 82 | pub fn into_base(self) -> Candidate<'a> { 83 | self.base 84 | } 85 | 86 | #[inline(always)] 87 | pub fn matches(&self, ch: char) -> CandidateMatches<'a> { 88 | self.base.matches(ch, self.opts.is_case_sensitive, self.opts.char_eq) 89 | } 90 | 91 | #[inline(always)] 92 | pub fn matches_from( 93 | &self, 94 | char_offset: usize, 95 | ch: char, 96 | ) -> CandidateMatches<'a> { 97 | self.base.matches_from( 98 | char_offset, 99 | ch, 100 | self.opts.is_case_sensitive, 101 | self.opts.char_eq, 102 | ) 103 | } 104 | 105 | #[inline(always)] 106 | pub fn new( 107 | base: Candidate<'a>, 108 | bonus_slab: &'a mut BonusSlab, 109 | initial_char_class: CharClass, 110 | opts: CandidateOpts, 111 | ) -> Self { 112 | let bonuses = bonus_slab.alloc(base.char_len()); 113 | Self { base, bonuses, initial_char_class, opts } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/metrics/fzf/distance.rs: -------------------------------------------------------------------------------- 1 | pub(super) type Score = i64; 2 | 3 | /// The fzf distance type. 4 | /// 5 | /// This struct is returned by [`FzfV1`](super::FzfV1) and 6 | /// [`FzfV2`](super::FzfV2)'s [`Metric`](crate::Metric) implementations. 7 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 8 | pub struct FzfDistance(Score); 9 | 10 | impl PartialOrd for FzfDistance { 11 | #[inline] 12 | fn partial_cmp(&self, other: &Self) -> Option { 13 | Some(self.cmp(other)) 14 | } 15 | } 16 | 17 | impl Ord for FzfDistance { 18 | #[inline] 19 | fn cmp(&self, other: &Self) -> core::cmp::Ordering { 20 | // This makes the type act like a distance and not like a score. 21 | other.0.cmp(&self.0) 22 | } 23 | } 24 | 25 | impl Default for FzfDistance { 26 | #[inline] 27 | fn default() -> Self { 28 | Self::from_score(0) 29 | } 30 | } 31 | 32 | impl FzfDistance { 33 | /// Creates a new [`FzfDistance`] from a score. 34 | #[inline(always)] 35 | pub(super) fn from_score(score: Score) -> Self { 36 | Self(score) 37 | } 38 | 39 | /// Returns a score representation of the distance. 40 | /// 41 | /// This is not part of the public API and should not be relied upon. 42 | /// 43 | /// It's only used internally for testing and debugging purposes. 44 | #[cfg(any(feature = "__into-score", feature = "__tests"))] 45 | #[inline(always)] 46 | pub fn into_score(self) -> Score { 47 | self.0 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/metrics/fzf/fzf.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | use super::{query::*, *}; 4 | use crate::utils::CharEq; 5 | use crate::*; 6 | 7 | /// TODO: docs 8 | pub(super) trait Fzf { 9 | /// TODO: docs 10 | fn alloc_chars<'a>(&mut self, candidate: &str) -> &'a [char]; 11 | 12 | /// TODO: docs 13 | fn char_eq(&self, pattern: Pattern) -> CharEq; 14 | 15 | /// TODO: docs 16 | fn scheme(&self) -> &Scheme; 17 | 18 | /// TODO: docs 19 | fn fuzzy( 20 | &mut self, 21 | pattern: Pattern, 22 | candidate: Candidate, 23 | ranges: &mut MatchedRanges, 24 | ) -> Option; 25 | 26 | /// TODO: docs 27 | fn score( 28 | &mut self, 29 | pattern: Pattern, 30 | candidate: Candidate, 31 | ranges: &mut MatchedRanges, 32 | ) -> Option { 33 | let score = match pattern.match_type { 34 | MatchType::Fuzzy => { 35 | if pattern.is_inverse { 36 | self.fuzzy::(pattern, candidate, ranges) 37 | } else { 38 | self.fuzzy::(pattern, candidate, ranges) 39 | } 40 | }, 41 | 42 | MatchType::Exact => { 43 | let char_eq = self.char_eq(pattern); 44 | 45 | if pattern.is_inverse { 46 | exact_match::( 47 | pattern, 48 | candidate, 49 | char_eq, 50 | self.scheme(), 51 | ranges, 52 | ) 53 | } else { 54 | exact_match::( 55 | pattern, 56 | candidate, 57 | char_eq, 58 | self.scheme(), 59 | ranges, 60 | ) 61 | } 62 | }, 63 | 64 | MatchType::PrefixExact => { 65 | let char_eq = self.char_eq(pattern); 66 | 67 | if pattern.is_inverse { 68 | prefix_match::( 69 | pattern, 70 | candidate, 71 | char_eq, 72 | self.scheme(), 73 | ranges, 74 | ) 75 | } else { 76 | prefix_match::( 77 | pattern, 78 | candidate, 79 | char_eq, 80 | self.scheme(), 81 | ranges, 82 | ) 83 | } 84 | }, 85 | 86 | MatchType::SuffixExact => { 87 | let char_eq = self.char_eq(pattern); 88 | 89 | if pattern.is_inverse { 90 | suffix_match::( 91 | pattern, 92 | candidate, 93 | char_eq, 94 | self.scheme(), 95 | ranges, 96 | ) 97 | } else { 98 | suffix_match::( 99 | pattern, 100 | candidate, 101 | char_eq, 102 | self.scheme(), 103 | ranges, 104 | ) 105 | } 106 | }, 107 | 108 | MatchType::EqualExact => { 109 | let char_eq = self.char_eq(pattern); 110 | 111 | if pattern.is_inverse { 112 | equal_match::( 113 | pattern, 114 | candidate, 115 | char_eq, 116 | self.scheme(), 117 | ranges, 118 | ) 119 | } else { 120 | equal_match::( 121 | pattern, 122 | candidate, 123 | char_eq, 124 | self.scheme(), 125 | ranges, 126 | ) 127 | } 128 | }, 129 | }; 130 | 131 | match (score.is_some(), pattern.is_inverse) { 132 | (true, false) => score, 133 | (false, true) => Some(0), 134 | _ => None, 135 | } 136 | } 137 | 138 | /// TODO: docs 139 | #[inline(always)] 140 | fn distance( 141 | &mut self, 142 | query: FzfQuery, 143 | candidate: &str, 144 | ranges: &mut Vec>, 145 | ) -> Option { 146 | if query.is_empty() { 147 | return Some(FzfDistance::from_score(0)); 148 | } 149 | 150 | let candidate = if candidate.is_ascii() { 151 | Candidate::Ascii(candidate.as_bytes()) 152 | } else { 153 | Candidate::Unicode(self.alloc_chars(candidate)) 154 | }; 155 | 156 | let ranges = &mut ranges.into(); 157 | 158 | match query.search_mode { 159 | SearchMode::NotExtended(pattern) => self 160 | .fuzzy::(pattern, candidate, ranges) 161 | .map(FzfDistance::from_score), 162 | 163 | SearchMode::Extended(conditions) => { 164 | let mut total_score: Score = 0; 165 | for condition in conditions { 166 | total_score += condition.iter().find_map(|pattern| { 167 | self.score::(pattern, candidate, ranges) 168 | })?; 169 | } 170 | Some(FzfDistance::from_score(total_score)) 171 | }, 172 | } 173 | } 174 | } 175 | 176 | /// TODO: docs 177 | #[inline] 178 | fn exact_match( 179 | pattern: Pattern, 180 | candidate: Candidate, 181 | char_eq: CharEq, 182 | scheme: &Scheme, 183 | ranges: &mut MatchedRanges, 184 | ) -> Option { 185 | if pattern.is_empty() { 186 | return Some(0); 187 | } 188 | 189 | // TODO: docs 190 | let mut best_bonus: i64 = -1; 191 | 192 | // TODO: docs 193 | let mut best_bonus_char_start = 0; 194 | 195 | // TODO: docs 196 | let mut best_bonus_char_end = 0; 197 | 198 | // TODO: docs 199 | let mut matched = false; 200 | 201 | let mut prev_class = scheme.initial_char_class; 202 | 203 | let mut start_offset = 0; 204 | 205 | 'outer: loop { 206 | let current_start_offset = start_offset; 207 | let mut bonus_start = 0; 208 | let mut current_bonus: Score = 0; 209 | let mut pattern_char_idx = 0; 210 | 211 | let mut chars = candidate.chars_from(start_offset).enumerate(); 212 | 213 | for (char_offset, candidate_ch) in chars.by_ref() { 214 | let pattern_ch = pattern.char(pattern_char_idx); 215 | 216 | let char_class = char_class(candidate_ch, scheme); 217 | 218 | if (char_eq)(pattern_ch, candidate_ch) { 219 | if pattern_char_idx == 0 { 220 | bonus_start = current_start_offset + char_offset; 221 | start_offset += char_offset + 1; 222 | current_bonus = 223 | compute_bonus(prev_class, char_class, scheme); 224 | } 225 | 226 | pattern_char_idx += 1; 227 | 228 | if pattern_char_idx == pattern.char_len() { 229 | matched = true; 230 | 231 | if current_bonus as i64 > best_bonus { 232 | best_bonus = current_bonus as _; 233 | 234 | best_bonus_char_start = bonus_start; 235 | 236 | best_bonus_char_end = 237 | current_start_offset + char_offset + 1; 238 | } 239 | 240 | if current_bonus >= bonus::BOUNDARY { 241 | break 'outer; 242 | } 243 | 244 | break; 245 | } 246 | } else if pattern_char_idx > 0 { 247 | break; 248 | } 249 | 250 | prev_class = char_class; 251 | } 252 | 253 | if chars.next().is_none() { 254 | break; 255 | } 256 | } 257 | 258 | if !matched { 259 | return None; 260 | } 261 | 262 | let matched_range = best_bonus_char_start..best_bonus_char_end; 263 | 264 | let score = compute_score::( 265 | pattern, 266 | candidate, 267 | matched_range.clone(), 268 | char_eq, 269 | scheme, 270 | ranges, 271 | ); 272 | 273 | if RANGES { 274 | ranges.insert(candidate.to_byte_range(matched_range)); 275 | } 276 | 277 | Some(score) 278 | } 279 | 280 | /// TODO: docs 281 | #[inline] 282 | fn prefix_match( 283 | pattern: Pattern, 284 | candidate: Candidate, 285 | char_eq: CharEq, 286 | scheme: &Scheme, 287 | ranges: &mut MatchedRanges, 288 | ) -> Option { 289 | if pattern.is_empty() { 290 | return Some(0); 291 | } 292 | 293 | let mut pattern_chars = pattern.chars(); 294 | 295 | let ignored_leading_spaces = 296 | ignored_candidate_leading_spaces(pattern, candidate)?; 297 | 298 | for (candidate_ch, pattern_ch) in candidate 299 | .chars_from(ignored_leading_spaces) 300 | .zip(pattern_chars.by_ref()) 301 | { 302 | if !char_eq(pattern_ch, candidate_ch) { 303 | return None; 304 | } 305 | } 306 | 307 | if pattern_chars.next().is_some() { 308 | return None; 309 | } 310 | 311 | let matched_range = { 312 | let start = ignored_leading_spaces; 313 | let end = start + pattern.char_len(); 314 | start..end 315 | }; 316 | 317 | let score = compute_score::( 318 | pattern, 319 | candidate, 320 | matched_range.clone(), 321 | char_eq, 322 | scheme, 323 | ranges, 324 | ); 325 | 326 | if RANGES { 327 | ranges.insert(candidate.to_byte_range(matched_range)); 328 | } 329 | 330 | Some(score) 331 | } 332 | 333 | /// TODO: docs 334 | #[inline] 335 | fn suffix_match( 336 | pattern: Pattern, 337 | candidate: Candidate, 338 | char_eq: CharEq, 339 | scheme: &Scheme, 340 | ranges: &mut MatchedRanges, 341 | ) -> Option { 342 | if pattern.is_empty() { 343 | return Some(0); 344 | } 345 | 346 | let mut pattern_chars = pattern.chars().rev(); 347 | 348 | let chars_up_to_ignored_spaces = candidate.char_len() 349 | - ignored_candidate_trailing_spaces(pattern, candidate)?; 350 | 351 | for (candidate_ch, pattern_ch) in candidate 352 | .slice(0..chars_up_to_ignored_spaces) 353 | .chars() 354 | .rev() 355 | .zip(pattern_chars.by_ref()) 356 | { 357 | if !char_eq(pattern_ch, candidate_ch) { 358 | return None; 359 | } 360 | } 361 | 362 | if pattern_chars.next().is_some() { 363 | return None; 364 | } 365 | 366 | let matched_range = { 367 | let end = chars_up_to_ignored_spaces; 368 | let start = end - pattern.char_len(); 369 | start..end 370 | }; 371 | 372 | let score = compute_score::( 373 | pattern, 374 | candidate, 375 | matched_range.clone(), 376 | char_eq, 377 | scheme, 378 | ranges, 379 | ); 380 | 381 | if RANGES { 382 | ranges.insert(candidate.to_byte_range(matched_range)); 383 | } 384 | 385 | Some(score) 386 | } 387 | 388 | /// TODO: docs 389 | #[inline] 390 | fn equal_match( 391 | pattern: Pattern, 392 | candidate: Candidate, 393 | char_eq: CharEq, 394 | scheme: &Scheme, 395 | ranges: &mut MatchedRanges, 396 | ) -> Option { 397 | if pattern.is_empty() { 398 | return Some(0); 399 | } 400 | 401 | let ignored_leading_spaces = 402 | ignored_candidate_leading_spaces(pattern, candidate)?; 403 | 404 | // The candidate contains only spaces. 405 | if ignored_leading_spaces == candidate.char_len() { 406 | return None; 407 | } 408 | 409 | let ignored_trailing_spaces = 410 | ignored_candidate_trailing_spaces(pattern, candidate)?; 411 | 412 | let matched_char_range = 413 | ignored_leading_spaces..candidate.char_len() - ignored_trailing_spaces; 414 | 415 | if matched_char_range.len() < pattern.char_len() { 416 | return None; 417 | } 418 | 419 | let mut pattern_chars = pattern.chars(); 420 | 421 | let mut candidate_chars = 422 | candidate.slice(matched_char_range.clone()).chars(); 423 | 424 | for (pattern_ch, candidate_ch) in 425 | pattern_chars.by_ref().zip(candidate_chars.by_ref()) 426 | { 427 | if !char_eq(pattern_ch, candidate_ch) { 428 | return None; 429 | } 430 | } 431 | 432 | if pattern_chars.next().is_some() || candidate_chars.next().is_some() { 433 | return None; 434 | } 435 | 436 | let score = compute_score::( 437 | pattern, 438 | candidate, 439 | matched_char_range.clone(), 440 | char_eq, 441 | scheme, 442 | ranges, 443 | ); 444 | 445 | if RANGES { 446 | ranges.insert(candidate.to_byte_range(matched_char_range)); 447 | } 448 | 449 | Some(score) 450 | } 451 | 452 | /// TODO: docs 453 | #[inline(always)] 454 | fn ignored_candidate_leading_spaces( 455 | pattern: Pattern, 456 | candidate: Candidate, 457 | ) -> Option { 458 | let candidate_leading_spaces = candidate.leading_spaces(); 459 | 460 | if pattern.leading_spaces() > candidate_leading_spaces { 461 | None 462 | } else { 463 | Some(candidate_leading_spaces - pattern.leading_spaces()) 464 | } 465 | } 466 | 467 | /// TODO: docs 468 | #[inline(always)] 469 | fn ignored_candidate_trailing_spaces( 470 | pattern: Pattern, 471 | candidate: Candidate, 472 | ) -> Option { 473 | let candidate_trailing_spaces = candidate.trailing_spaces(); 474 | 475 | if pattern.trailing_spaces() > candidate_trailing_spaces { 476 | None 477 | } else { 478 | Some(candidate_trailing_spaces - pattern.trailing_spaces()) 479 | } 480 | } 481 | 482 | /// TODO: docs 483 | #[inline] 484 | pub(super) fn compute_score( 485 | pattern: Pattern, 486 | candidate: Candidate, 487 | candidate_char_range: Range, 488 | char_eq: CharEq, 489 | scheme: &Scheme, 490 | ranges: &mut MatchedRanges, 491 | ) -> Score { 492 | // TODO: docs 493 | let mut is_in_gap = false; 494 | 495 | // TODO: docs 496 | let mut is_first_pattern_char = true; 497 | 498 | // TODO: docs 499 | let mut first_bonus: Score = 0; 500 | 501 | // TODO: docs 502 | let mut consecutive = 0u32; 503 | 504 | let byte_range_start = if RANGES { 505 | candidate.to_byte_offset(candidate_char_range.start) 506 | } else { 507 | 0 508 | }; 509 | 510 | let mut byte_offset = 0; 511 | 512 | let mut prev_class = if candidate_char_range.start == 0 { 513 | scheme.initial_char_class 514 | } else { 515 | char_class(candidate.char(candidate_char_range.start - 1), scheme) 516 | }; 517 | 518 | let mut pattern_chars = pattern.chars(); 519 | 520 | let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); 521 | 522 | let mut score: Score = 0; 523 | 524 | for candidate_ch in candidate.slice(candidate_char_range).chars() { 525 | let ch_class = char_class(candidate_ch, scheme); 526 | 527 | if char_eq(pattern_char, candidate_ch) { 528 | score += bonus::MATCH; 529 | 530 | let mut bonus = compute_bonus(prev_class, ch_class, scheme); 531 | 532 | if consecutive == 0 { 533 | first_bonus = bonus; 534 | } else { 535 | if bonus >= bonus::BOUNDARY && bonus > first_bonus { 536 | first_bonus = bonus 537 | } 538 | bonus = bonus.max(first_bonus).max(bonus::CONSECUTIVE); 539 | } 540 | 541 | score += if is_first_pattern_char { 542 | bonus * bonus::FIRST_QUERY_CHAR_MULTIPLIER 543 | } else { 544 | bonus 545 | }; 546 | 547 | if RANGES { 548 | let start = byte_range_start + byte_offset; 549 | let end = start + candidate_ch.len_utf8(); 550 | ranges.insert(start..end); 551 | } 552 | 553 | is_in_gap = false; 554 | 555 | is_first_pattern_char = false; 556 | 557 | consecutive += 1; 558 | 559 | if let Some(next_char) = pattern_chars.next() { 560 | pattern_char = next_char; 561 | } else { 562 | break; 563 | }; 564 | } else { 565 | score -= if is_in_gap { 566 | penalty::GAP_EXTENSION 567 | } else { 568 | penalty::GAP_START 569 | }; 570 | 571 | is_in_gap = true; 572 | 573 | consecutive = 0; 574 | 575 | first_bonus = 0; 576 | } 577 | 578 | prev_class = ch_class; 579 | 580 | if RANGES { 581 | byte_offset += candidate_ch.len_utf8(); 582 | } 583 | } 584 | 585 | score 586 | } 587 | 588 | #[cfg(test)] 589 | mod tests { 590 | #![allow(clippy::single_range_in_vec_init)] 591 | 592 | use super::*; 593 | 594 | fn candidate(s: &str) -> Candidate { 595 | assert!(s.is_ascii()); 596 | Candidate::Ascii(s.as_bytes()) 597 | } 598 | 599 | #[test] 600 | fn equal_match_1() { 601 | let pattern = 602 | Pattern::parse("^AbC$".chars().collect::>().leak()) 603 | .unwrap(); 604 | 605 | let mut ranges_buf = Vec::new(); 606 | 607 | assert!(exact_match::( 608 | pattern, 609 | candidate("ABC"), 610 | utils::char_eq(true, false), 611 | &Scheme::default(), 612 | &mut ((&mut ranges_buf).into()) 613 | ) 614 | .is_none()); 615 | 616 | { 617 | ranges_buf.clear(); 618 | 619 | assert!(exact_match::( 620 | pattern, 621 | candidate("AbC"), 622 | utils::char_eq(true, false), 623 | &Scheme::default(), 624 | &mut ((&mut ranges_buf).into()) 625 | ) 626 | .is_some()); 627 | 628 | assert_eq!(ranges_buf.as_slice(), [0..3]); 629 | } 630 | 631 | { 632 | ranges_buf.clear(); 633 | 634 | assert!(exact_match::( 635 | pattern, 636 | candidate("AbC "), 637 | utils::char_eq(true, false), 638 | &Scheme::default(), 639 | &mut ((&mut ranges_buf).into()) 640 | ) 641 | .is_some()); 642 | 643 | assert_eq!(ranges_buf.as_slice(), [0..3]); 644 | } 645 | 646 | { 647 | ranges_buf.clear(); 648 | 649 | assert!(exact_match::( 650 | pattern, 651 | candidate(" AbC "), 652 | utils::char_eq(true, false), 653 | &Scheme::default(), 654 | &mut ((&mut ranges_buf).into()) 655 | ) 656 | .is_some()); 657 | 658 | assert_eq!(ranges_buf.as_slice(), [1..4]); 659 | } 660 | 661 | { 662 | ranges_buf.clear(); 663 | 664 | assert!(exact_match::( 665 | pattern, 666 | candidate(" AbC"), 667 | utils::char_eq(true, false), 668 | &Scheme::default(), 669 | &mut ((&mut ranges_buf).into()) 670 | ) 671 | .is_some()); 672 | 673 | assert_eq!(ranges_buf.as_slice(), [2..5]); 674 | } 675 | } 676 | 677 | #[test] 678 | fn exact_match_1() { 679 | let pattern = 680 | Pattern::parse("abc".chars().collect::>().leak()).unwrap(); 681 | 682 | let mut ranges_buf = Vec::new(); 683 | 684 | assert!(exact_match::( 685 | pattern, 686 | candidate("aabbcc abc"), 687 | utils::char_eq(true, false), 688 | &Scheme::default(), 689 | &mut ((&mut ranges_buf).into()) 690 | ) 691 | .is_some()); 692 | 693 | assert_eq!(ranges_buf, [7..10]); 694 | } 695 | } 696 | -------------------------------------------------------------------------------- /src/metrics/fzf/fzf_v1.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | use super::{query::*, *}; 4 | use crate::*; 5 | 6 | /// A metric that implements fzf's v1 algorithm. 7 | /// 8 | /// The [`Metric`] implementation of this struct produces the same results that 9 | /// `fzf` would produce when run with the `--algo=v1` flag. 10 | /// 11 | /// The algorithm used in the [`distance`](Metric::distance) calculation simply 12 | /// looks for the first fuzzy match of the query in the candidate. If a match 13 | /// is found, it traverses backwards from the end of the match to see if 14 | /// there's a shorter substring that also matches the query. 15 | /// 16 | /// By always stopping at the first alignment this metric is able to provide a 17 | /// `O(len(candidate))` time complexity for both matches and non-matches, but 18 | /// it usually produces less accurate results than [`FzfV2`]. 19 | /// 20 | /// # Example 21 | /// 22 | /// ```rust 23 | /// # use norm::fzf::{FzfV1, FzfParser}; 24 | /// # use norm::Metric; 25 | /// let mut v1 = FzfV1::new(); 26 | /// let mut parser = FzfParser::new(); 27 | /// let mut ranges = Vec::new(); 28 | /// 29 | /// let query = parser.parse("abc"); 30 | /// 31 | /// let candidate = "a_b_abcd"; 32 | /// 33 | /// /* 34 | /// We're looking for "abc" in "a_b_abcd". A first scan will find "a_b_abc": 35 | /// 36 | /// a_b_abcd 37 | /// ******* 38 | /// 39 | /// Now it will stop looking for other (potentially better) alignments of the 40 | /// query, and will instead start going backwards from the end of the match, 41 | /// i.e. from "c". 42 | /// 43 | /// In this case, it will find "abc", which is shorter than "a_b_abc": 44 | /// 45 | /// a_b_abcd 46 | /// *** 47 | /// 48 | /// so the matched range will be "abc". 49 | /// */ 50 | /// 51 | /// let _distance = 52 | /// v1.distance_and_ranges(query, candidate, &mut ranges).unwrap(); 53 | /// 54 | /// assert_eq!(ranges, [4..7]); 55 | /// ``` 56 | #[cfg_attr(docsrs, doc(cfg(feature = "fzf-v1")))] 57 | #[derive(Clone, Default)] 58 | pub struct FzfV1 { 59 | /// TODO: docs 60 | candidate_slab: CandidateSlab, 61 | 62 | /// TODO: docs 63 | case_sensitivity: CaseSensitivity, 64 | 65 | /// TODO: docs 66 | candidate_normalization: bool, 67 | 68 | /// TODO: docs 69 | scoring_scheme: Scheme, 70 | } 71 | 72 | impl core::fmt::Debug for FzfV1 { 73 | #[inline] 74 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 75 | let Some(scoring_scheme) = FzfScheme::from_inner(&self.scoring_scheme) 76 | else { 77 | return Ok(()); 78 | }; 79 | 80 | f.debug_struct("FzfV1") 81 | .field("candidate_normalization", &self.candidate_normalization) 82 | .field("case_sensitivity", &self.case_sensitivity) 83 | .field("scoring_scheme", &scoring_scheme) 84 | .finish_non_exhaustive() 85 | } 86 | } 87 | 88 | impl FzfV1 { 89 | /// Creates a new `FzfV1`. 90 | /// 91 | /// This will immediately allocate 512 bytes of heap memory, so it's 92 | /// recommended to call this once and reuse the same instance for multiple 93 | /// distance calculations. 94 | #[inline(always)] 95 | pub fn new() -> Self { 96 | Self::default() 97 | } 98 | 99 | /// Returns the current scoring scheme. This is only used for testing. 100 | #[cfg(feature = "__tests")] 101 | pub fn scheme(&self) -> &Scheme { 102 | &self.scoring_scheme 103 | } 104 | 105 | /// Sets whether multi-byte latin characters in the candidate should be 106 | /// normalized to ASCII before comparing them to the query. The default is 107 | /// `false`. 108 | /// 109 | /// # Example 110 | /// 111 | /// ```rust 112 | /// # use norm::fzf::{FzfV1, FzfParser}; 113 | /// # use norm::{Metric, CaseSensitivity}; 114 | /// let mut fzf = FzfV1::new(); 115 | /// let mut parser = FzfParser::new(); 116 | /// 117 | /// // FzfV1 doesn't normalize candidates by default. 118 | /// assert!(fzf.distance(parser.parse("foo"), "ƒöö").is_none()); 119 | /// 120 | /// fzf.set_candidate_normalization(true); 121 | /// 122 | /// // With normalization enabled, we get a match. 123 | /// assert!(fzf.distance(parser.parse("foo"), "ƒöö").is_some()); 124 | /// 125 | /// // Note that normalization is only applied to the candidate, the query 126 | /// // is left untouched. 127 | /// assert!(fzf.distance(parser.parse("ƒöö"), "foo").is_none()); 128 | /// ``` 129 | #[inline(always)] 130 | pub fn set_candidate_normalization( 131 | &mut self, 132 | normalization: bool, 133 | ) -> &mut Self { 134 | self.candidate_normalization = normalization; 135 | self 136 | } 137 | 138 | /// Sets the case sensitivity to use when comparing the characters of the 139 | /// query and the candidate. The default is [`CaseSensitivity::Smart`]. 140 | /// 141 | /// # Example 142 | /// 143 | /// ```rust 144 | /// # use norm::fzf::{FzfV1, FzfParser}; 145 | /// # use norm::{Metric, CaseSensitivity}; 146 | /// let mut fzf = FzfV1::new(); 147 | /// let mut parser = FzfParser::new(); 148 | /// 149 | /// // FzfV1 uses smart case sensitivity by default. 150 | /// assert!(fzf.distance(parser.parse("abc"), "ABC").is_some()); 151 | /// 152 | /// fzf.set_case_sensitivity(CaseSensitivity::Sensitive); 153 | /// 154 | /// // Now it's case sensitive, so the query won't match the candidate. 155 | /// assert!(fzf.distance(parser.parse("abc"), "ABC").is_none()); 156 | /// ``` 157 | #[inline(always)] 158 | pub fn set_case_sensitivity( 159 | &mut self, 160 | case_sensitivity: CaseSensitivity, 161 | ) -> &mut Self { 162 | self.case_sensitivity = case_sensitivity; 163 | self 164 | } 165 | 166 | /// Sets the scoring scheme to use when calculating the distance between 167 | /// the query and the candidate. The default is [`FzfScheme::Default`]. 168 | /// 169 | /// # Example 170 | /// 171 | /// ```rust 172 | /// # use norm::fzf::{FzfV1, FzfParser, FzfScheme}; 173 | /// # use norm::{Metric}; 174 | /// let mut fzf = FzfV1::new(); 175 | /// let mut parser = FzfParser::new(); 176 | /// 177 | /// let query = parser.parse("foo"); 178 | /// 179 | /// // With the default scoring scheme, "f o o" is considered a better 180 | /// // match than "f/o/o" when searching for "foo". 181 | /// let distance_spaces = fzf.distance(query, "f o o").unwrap(); 182 | /// let distance_path_separator = fzf.distance(query, "f/o/o").unwrap(); 183 | /// assert!(distance_spaces < distance_path_separator); 184 | /// 185 | /// // When searching for a file path we want to use a scoring scheme that 186 | /// // considers "f/o/o" a better match than "f o o". 187 | /// fzf.set_scoring_scheme(FzfScheme::Path); 188 | /// 189 | /// // Now "f/o/o" is considered a better match than "f o o". 190 | /// let distance_spaces = fzf.distance(query, "f o o").unwrap(); 191 | /// let distance_path_separator = fzf.distance(query, "f/o/o").unwrap(); 192 | /// assert!(distance_path_separator < distance_spaces); 193 | /// ``` 194 | #[inline(always)] 195 | pub fn set_scoring_scheme(&mut self, scheme: FzfScheme) -> &mut Self { 196 | self.scoring_scheme = scheme.into_inner(); 197 | self 198 | } 199 | } 200 | 201 | impl Metric for FzfV1 { 202 | type Query<'a> = FzfQuery<'a>; 203 | 204 | type Distance = FzfDistance; 205 | 206 | #[inline(always)] 207 | fn distance( 208 | &mut self, 209 | query: FzfQuery<'_>, 210 | candidate: &str, 211 | ) -> Option { 212 | let ranges = &mut Vec::new(); 213 | ::distance::(self, query, candidate, ranges) 214 | } 215 | 216 | #[inline] 217 | fn distance_and_ranges( 218 | &mut self, 219 | query: FzfQuery<'_>, 220 | candidate: &str, 221 | ranges: &mut Vec>, 222 | ) -> Option { 223 | ::distance::(self, query, candidate, ranges) 224 | } 225 | } 226 | 227 | impl Fzf for FzfV1 { 228 | #[inline(always)] 229 | fn alloc_chars<'a>(&mut self, s: &str) -> &'a [char] { 230 | unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } 231 | } 232 | 233 | #[inline(always)] 234 | fn char_eq(&self, pattern: Pattern) -> utils::CharEq { 235 | let is_sensitive = match self.case_sensitivity { 236 | CaseSensitivity::Sensitive => true, 237 | CaseSensitivity::Insensitive => false, 238 | CaseSensitivity::Smart => pattern.has_uppercase, 239 | }; 240 | 241 | utils::char_eq(is_sensitive, self.candidate_normalization) 242 | } 243 | 244 | #[inline(always)] 245 | fn scheme(&self) -> &Scheme { 246 | &self.scoring_scheme 247 | } 248 | 249 | #[inline(always)] 250 | fn fuzzy( 251 | &mut self, 252 | pattern: Pattern, 253 | candidate: Candidate, 254 | ranges: &mut MatchedRanges, 255 | ) -> Option { 256 | let is_sensitive = match self.case_sensitivity { 257 | CaseSensitivity::Sensitive => true, 258 | CaseSensitivity::Insensitive => false, 259 | CaseSensitivity::Smart => pattern.has_uppercase, 260 | }; 261 | 262 | let opts = 263 | CandidateOpts::new(is_sensitive, self.candidate_normalization); 264 | 265 | let end_forward = forward_pass(pattern, candidate, opts)?; 266 | 267 | let start_backward = 268 | backward_pass(pattern, candidate, end_forward, opts); 269 | 270 | let score = compute_score::( 271 | pattern, 272 | candidate, 273 | start_backward..end_forward, 274 | opts.char_eq, 275 | &self.scoring_scheme, 276 | ranges, 277 | ); 278 | 279 | Some(score) 280 | } 281 | } 282 | 283 | /// TODO: docs 284 | #[inline] 285 | fn forward_pass( 286 | pattern: Pattern, 287 | candidate: Candidate, 288 | opts: CandidateOpts, 289 | ) -> Option { 290 | let mut pattern_chars = pattern.chars(); 291 | 292 | let mut pattern_char = pattern_chars.next()?; 293 | 294 | let mut end_offset = 0; 295 | 296 | loop { 297 | end_offset = candidate.find_first_from( 298 | end_offset, 299 | pattern_char, 300 | opts.is_case_sensitive, 301 | opts.char_eq, 302 | )? + 1; 303 | 304 | if let Some(next) = pattern_chars.next() { 305 | pattern_char = next; 306 | } else { 307 | return Some(end_offset); 308 | } 309 | } 310 | } 311 | 312 | /// TODO: docs 313 | #[inline] 314 | fn backward_pass( 315 | pattern: Pattern, 316 | candidate: Candidate, 317 | end_offset: usize, 318 | opts: CandidateOpts, 319 | ) -> usize { 320 | let mut pattern_chars = pattern.chars().rev(); 321 | 322 | let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); 323 | 324 | let mut start_offset = end_offset; 325 | 326 | loop { 327 | start_offset = candidate 328 | .find_last_from( 329 | start_offset, 330 | pattern_char, 331 | opts.is_case_sensitive, 332 | opts.char_eq, 333 | ) 334 | .unwrap(); 335 | 336 | if let Some(next) = pattern_chars.next() { 337 | pattern_char = next; 338 | } else { 339 | return start_offset; 340 | } 341 | } 342 | } 343 | -------------------------------------------------------------------------------- /src/metrics/fzf/fzf_v2.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | use super::{query::*, slab::*, *}; 4 | use crate::*; 5 | 6 | /// A metric that implements fzf's v2 algorithm. 7 | /// 8 | /// The [`Metric`] implementation of this struct produces the same results that 9 | /// `fzf` would produce when run with the `--algo=v2` flag. 10 | /// 11 | /// The algorithm used in the [`distance`](Metric::distance) calculation is a 12 | /// modified version of the [Smith-Waterman][sw] algorithm, which was 13 | /// originally designed for finding the best alignment between two DNA or 14 | /// protein sequences. 15 | /// 16 | /// Unlike [`FzfV1`], this metric is able to find the best occurrence of a 17 | /// query within a candidate by considering all possible alignments between the 18 | /// two. For example, given the query `"foo"` and the candidate `"f_o_o_foo"`, 19 | /// `FzfV1` would stop at the first match it finds, i.e. `"f_o_o"`. `FzfV2` on 20 | /// the other hand returns the distance and range of the best alignment 21 | /// according to its scoring criteria, which in this case would be `"foo"`. 22 | /// 23 | /// ```rust 24 | /// # use norm::fzf::{FzfV1, FzfV2, FzfParser}; 25 | /// # use norm::Metric; 26 | /// let mut v1 = FzfV1::new(); 27 | /// let mut v2 = FzfV2::new(); 28 | /// let mut parser = FzfParser::new(); 29 | /// let mut ranges = Vec::new(); 30 | /// 31 | /// let query = parser.parse("foo"); 32 | /// 33 | /// let candidate = "f_o_o_foo"; 34 | /// 35 | /// let distance_v1 = 36 | /// v1.distance_and_ranges(query, candidate, &mut ranges).unwrap(); 37 | /// 38 | /// assert_eq!(ranges, [0..1, 2..3, 4..5]); 39 | /// 40 | /// ranges.clear(); 41 | /// 42 | /// let distance_v2 = 43 | /// v2.distance_and_ranges(query, candidate, &mut ranges).unwrap(); 44 | /// 45 | /// assert_eq!(ranges, [6..9]); 46 | /// 47 | /// // The alignment found by FzfV2 has a lower distance than the one 48 | /// // found by FzfV1. 49 | /// assert!(distance_v2 < distance_v1); 50 | /// ``` 51 | /// 52 | /// Of course, this increase in accuracy comes at the cost of a higher time 53 | /// complexity for the distance calculation, namely `O(len(query) * 54 | /// len(candidate))` instead of `O(len(candidate))` for `FzfV1`. 55 | /// 56 | /// However, filtering out non-matches is still done in `O(len(candidate))`, so 57 | /// for queries with decent selectivity the performance difference between the 58 | /// two metrics is usually negligible even when dealing with a large number of 59 | /// candidates. 60 | /// 61 | /// 62 | /// [sw]: https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm 63 | #[cfg_attr(docsrs, doc(cfg(feature = "fzf-v2")))] 64 | #[derive(Clone, Default)] 65 | pub struct FzfV2 { 66 | /// TODO: docs 67 | candidate_slab: CandidateSlab, 68 | 69 | /// TODO: docs 70 | candidate_normalization: bool, 71 | 72 | /// TODO: docs 73 | case_sensitivity: CaseSensitivity, 74 | 75 | /// TODO: docs 76 | scoring_scheme: Scheme, 77 | 78 | /// TODO: docs 79 | slab: V2Slab, 80 | } 81 | 82 | impl core::fmt::Debug for FzfV2 { 83 | #[inline] 84 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 85 | let Some(scoring_scheme) = FzfScheme::from_inner(&self.scoring_scheme) 86 | else { 87 | return Ok(()); 88 | }; 89 | 90 | f.debug_struct("FzfV2") 91 | .field("candidate_normalization", &self.candidate_normalization) 92 | .field("case_sensitivity", &self.case_sensitivity) 93 | .field("scoring_scheme", &scoring_scheme) 94 | .finish_non_exhaustive() 95 | } 96 | } 97 | 98 | impl FzfV2 { 99 | /// Creates a new `FzfV2`. 100 | /// 101 | /// This will immediately allocate around 5kb of heap memory, so it's 102 | /// recommended to call this once and reuse the same instance for multiple 103 | /// distance calculations. 104 | #[inline(always)] 105 | pub fn new() -> Self { 106 | Self::default() 107 | } 108 | 109 | /// Returns the current scoring scheme. This is only used for testing. 110 | #[cfg(feature = "__tests")] 111 | pub fn scheme(&self) -> &Scheme { 112 | &self.scoring_scheme 113 | } 114 | 115 | /// Sets the case sensitivity to use when comparing the characters of the 116 | /// Sets whether multi-byte latin characters in the candidate should be 117 | /// normalized to ASCII before comparing them to the query. The default is 118 | /// `false`. 119 | /// 120 | /// # Example 121 | /// 122 | /// ```rust 123 | /// # use norm::fzf::{FzfV2, FzfParser}; 124 | /// # use norm::{Metric, CaseSensitivity}; 125 | /// let mut fzf = FzfV2::new(); 126 | /// let mut parser = FzfParser::new(); 127 | /// 128 | /// // FzfV2 doesn't normalize candidates by default. 129 | /// assert!(fzf.distance(parser.parse("foo"), "ƒöö").is_none()); 130 | /// 131 | /// fzf.set_candidate_normalization(true); 132 | /// 133 | /// // With normalization enabled, we get a match. 134 | /// assert!(fzf.distance(parser.parse("foo"), "ƒöö").is_some()); 135 | /// 136 | /// // Note that normalization is only applied to the candidate, the query 137 | /// // is left untouched. 138 | /// assert!(fzf.distance(parser.parse("ƒöö"), "foo").is_none()); 139 | /// ``` 140 | #[inline(always)] 141 | pub fn set_candidate_normalization( 142 | &mut self, 143 | normalization: bool, 144 | ) -> &mut Self { 145 | self.candidate_normalization = normalization; 146 | self 147 | } 148 | 149 | /// Sets the case sensitivity to use when comparing the characters of the 150 | /// query and the candidate. The default is [`CaseSensitivity::Smart`]. 151 | /// 152 | /// # Example 153 | /// 154 | /// ```rust 155 | /// # use norm::fzf::{FzfV2, FzfParser}; 156 | /// # use norm::{Metric, CaseSensitivity}; 157 | /// let mut fzf = FzfV2::new(); 158 | /// let mut parser = FzfParser::new(); 159 | /// 160 | /// // FzfV2 uses smart case sensitivity by default. 161 | /// assert!(fzf.distance(parser.parse("abc"), "ABC").is_some()); 162 | /// 163 | /// fzf.set_case_sensitivity(CaseSensitivity::Sensitive); 164 | /// 165 | /// // Now it's case sensitive, so the query won't match the candidate. 166 | /// assert!(fzf.distance(parser.parse("abc"), "ABC").is_none()); 167 | /// ``` 168 | #[inline(always)] 169 | pub fn set_case_sensitivity( 170 | &mut self, 171 | case_sensitivity: CaseSensitivity, 172 | ) -> &mut Self { 173 | self.case_sensitivity = case_sensitivity; 174 | self 175 | } 176 | 177 | /// Sets the scoring scheme to use when calculating the distance between 178 | /// the query and the candidate. The default is [`FzfScheme::Default`]. 179 | /// 180 | /// # Example 181 | /// 182 | /// ```rust 183 | /// # use norm::fzf::{FzfV2, FzfParser, FzfScheme}; 184 | /// # use norm::{Metric}; 185 | /// let mut fzf = FzfV2::new(); 186 | /// let mut parser = FzfParser::new(); 187 | /// 188 | /// let query = parser.parse("foo"); 189 | /// 190 | /// // With the default scoring scheme, "f o o" is considered a better 191 | /// // match than "f/o/o" when searching for "foo". 192 | /// let distance_spaces = fzf.distance(query, "f o o").unwrap(); 193 | /// let distance_path_separator = fzf.distance(query, "f/o/o").unwrap(); 194 | /// assert!(distance_spaces < distance_path_separator); 195 | /// 196 | /// // When searching for a file path we want to use a scoring scheme that 197 | /// // considers "f/o/o" a better match than "f o o". 198 | /// fzf.set_scoring_scheme(FzfScheme::Path); 199 | /// 200 | /// // Now "f/o/o" is considered a better match than "f o o". 201 | /// let distance_spaces = fzf.distance(query, "f o o").unwrap(); 202 | /// let distance_path_separator = fzf.distance(query, "f/o/o").unwrap(); 203 | /// assert!(distance_path_separator < distance_spaces); 204 | /// ``` 205 | #[inline(always)] 206 | pub fn set_scoring_scheme(&mut self, scheme: FzfScheme) -> &mut Self { 207 | self.scoring_scheme = scheme.into_inner(); 208 | self 209 | } 210 | } 211 | 212 | impl Metric for FzfV2 { 213 | type Query<'a> = FzfQuery<'a>; 214 | 215 | type Distance = FzfDistance; 216 | 217 | #[inline(always)] 218 | fn distance( 219 | &mut self, 220 | query: FzfQuery<'_>, 221 | candidate: &str, 222 | ) -> Option { 223 | let ranges = &mut Vec::new(); 224 | ::distance::(self, query, candidate, ranges) 225 | } 226 | 227 | #[inline(always)] 228 | fn distance_and_ranges( 229 | &mut self, 230 | query: FzfQuery<'_>, 231 | candidate: &str, 232 | ranges: &mut Vec>, 233 | ) -> Option { 234 | ::distance::(self, query, candidate, ranges) 235 | } 236 | } 237 | 238 | impl Fzf for FzfV2 { 239 | #[inline(always)] 240 | fn alloc_chars<'a>(&mut self, s: &str) -> &'a [char] { 241 | unsafe { core::mem::transmute(self.candidate_slab.alloc(s)) } 242 | } 243 | 244 | #[inline(always)] 245 | fn char_eq(&self, pattern: Pattern) -> utils::CharEq { 246 | let is_sensitive = match self.case_sensitivity { 247 | CaseSensitivity::Sensitive => true, 248 | CaseSensitivity::Insensitive => false, 249 | CaseSensitivity::Smart => pattern.has_uppercase, 250 | }; 251 | 252 | utils::char_eq(is_sensitive, self.candidate_normalization) 253 | } 254 | 255 | #[inline(always)] 256 | fn scheme(&self) -> &Scheme { 257 | &self.scoring_scheme 258 | } 259 | 260 | #[inline(always)] 261 | fn fuzzy( 262 | &mut self, 263 | pattern: Pattern, 264 | candidate: Candidate, 265 | ranges: &mut MatchedRanges, 266 | ) -> Option { 267 | let is_sensitive = match self.case_sensitivity { 268 | CaseSensitivity::Sensitive => true, 269 | CaseSensitivity::Insensitive => false, 270 | CaseSensitivity::Smart => pattern.has_uppercase, 271 | }; 272 | 273 | let opts = 274 | CandidateOpts::new(is_sensitive, self.candidate_normalization); 275 | 276 | if pattern.char_len() == 1 { 277 | return fuzzy_single_char::( 278 | pattern.char(0), 279 | candidate, 280 | opts, 281 | self.scheme(), 282 | ranges, 283 | ); 284 | } 285 | 286 | let (match_offsets, last_match_offset) = 287 | matches(&mut self.slab.matched_indices, pattern, candidate, opts)?; 288 | 289 | let first_offset = match_offsets[0]; 290 | 291 | let start_byte_offset = 292 | if RANGES { candidate.to_byte_offset(first_offset) } else { 0 }; 293 | 294 | let initial_char_class = if first_offset == 0 { 295 | self.scoring_scheme.initial_char_class 296 | } else { 297 | char_class(candidate.char(first_offset - 1), &self.scoring_scheme) 298 | }; 299 | 300 | let mut candidate = CandidateV2::new( 301 | candidate.slice(first_offset..last_match_offset), 302 | &mut self.slab.bonus, 303 | initial_char_class, 304 | opts, 305 | ); 306 | 307 | // After slicing the candidate we move all the offsets back by the 308 | // first offset. 309 | match_offsets.iter_mut().for_each(|offset| *offset -= first_offset); 310 | 311 | let (scores, consecutive, score, score_cell) = score( 312 | &mut self.slab.scoring_matrix, 313 | &mut self.slab.consecutive_matrix, 314 | pattern, 315 | &mut candidate, 316 | match_offsets, 317 | &self.scoring_scheme, 318 | ); 319 | 320 | if RANGES { 321 | matched_ranges( 322 | scores, 323 | consecutive, 324 | score_cell, 325 | candidate.into_base(), 326 | start_byte_offset, 327 | ranges, 328 | ); 329 | }; 330 | 331 | Some(score) 332 | } 333 | } 334 | 335 | /// TODO: docs 336 | #[inline] 337 | fn matches<'idx>( 338 | indices_slab: &'idx mut MatchedIndicesSlab, 339 | pattern: Pattern, 340 | candidate: Candidate, 341 | opts: CandidateOpts, 342 | ) -> Option<(&'idx mut [usize], usize)> { 343 | let match_offsets = indices_slab.alloc(pattern.char_len()); 344 | 345 | let mut pattern_char_idx = 0; 346 | 347 | let mut last_match_offset = 0; 348 | 349 | loop { 350 | let pattern_char = pattern.char(pattern_char_idx); 351 | 352 | last_match_offset = candidate.find_first_from( 353 | last_match_offset, 354 | pattern_char, 355 | opts.is_case_sensitive, 356 | opts.char_eq, 357 | )?; 358 | 359 | match_offsets[pattern_char_idx] = last_match_offset; 360 | 361 | last_match_offset += 1; 362 | 363 | if pattern_char_idx + 1 < pattern.char_len() { 364 | pattern_char_idx += 1; 365 | } else { 366 | break; 367 | } 368 | } 369 | 370 | let last_char_offset_inclusive = candidate 371 | .find_last( 372 | pattern.char(pattern_char_idx), 373 | opts.is_case_sensitive, 374 | opts.char_eq, 375 | ) 376 | .unwrap() 377 | + 1; 378 | 379 | Some((match_offsets, last_char_offset_inclusive)) 380 | } 381 | 382 | /// TODO: docs 383 | #[inline] 384 | fn score<'scoring, 'consecutive>( 385 | scoring_slab: &'scoring mut MatrixSlab, 386 | consecutive_slab: &'consecutive mut MatrixSlab, 387 | pattern: Pattern, 388 | candidate: &mut CandidateV2, 389 | matches: &[usize], 390 | scheme: &Scheme, 391 | ) -> (Matrix<'scoring, Score>, Matrix<'consecutive, usize>, Score, MatrixCell) 392 | { 393 | let matrix_width = candidate.char_len(); 394 | 395 | let matrix_height = pattern.char_len(); 396 | 397 | let mut scoring_matrix = scoring_slab.alloc(matrix_width, matrix_height); 398 | 399 | let mut consecutive_matrix = 400 | consecutive_slab.alloc(matrix_width, matrix_height); 401 | 402 | let (max_score, max_score_cell) = score_first_row( 403 | scoring_matrix.row_mut(0), 404 | consecutive_matrix.row_mut(0), 405 | pattern.char(0), 406 | candidate, 407 | scheme, 408 | ); 409 | 410 | let (max_score, max_score_cell) = score_remaining_rows( 411 | &mut scoring_matrix, 412 | &mut consecutive_matrix, 413 | pattern, 414 | candidate, 415 | scheme, 416 | matches, 417 | max_score, 418 | max_score_cell, 419 | ); 420 | 421 | (scoring_matrix, consecutive_matrix, max_score, max_score_cell) 422 | } 423 | 424 | /// TODO: docs 425 | #[inline] 426 | fn score_first_row( 427 | scores_first_row: &mut Row, 428 | consecutives_first_row: &mut Row, 429 | first_pattern_char: char, 430 | candidate: &mut CandidateV2, 431 | scheme: &Scheme, 432 | ) -> (Score, MatrixCell) { 433 | let mut max_score: Score = 0; 434 | 435 | let mut prev_score: Score = 0; 436 | 437 | let mut max_score_col: usize = 0; 438 | 439 | let mut column = 0; 440 | 441 | let mut penalty = penalty::GAP_START; 442 | 443 | for char_offset in candidate.matches(first_pattern_char) { 444 | penalty = penalty::GAP_START; 445 | 446 | for col in column + 1..char_offset { 447 | let score = prev_score.saturating_sub(penalty); 448 | scores_first_row[col] = score; 449 | prev_score = score; 450 | penalty = penalty::GAP_EXTENSION; 451 | } 452 | 453 | column = char_offset; 454 | 455 | consecutives_first_row[column] = 1; 456 | 457 | let score = bonus::MATCH 458 | + (candidate.bonus_at(column, scheme) 459 | * bonus::FIRST_QUERY_CHAR_MULTIPLIER); 460 | 461 | scores_first_row[column] = score; 462 | 463 | if score > max_score { 464 | max_score = score; 465 | max_score_col = column; 466 | } 467 | 468 | prev_score = score; 469 | } 470 | 471 | for col in column + 1..scores_first_row.len() { 472 | let score = prev_score.saturating_sub(penalty); 473 | scores_first_row[col] = score; 474 | prev_score = score; 475 | penalty = penalty::GAP_EXTENSION; 476 | } 477 | 478 | (max_score, MatrixCell(max_score_col)) 479 | } 480 | 481 | /// TODO: docs 482 | #[inline] 483 | fn score_remaining_rows( 484 | scores: &mut Matrix<'_, Score>, 485 | consecutives: &mut Matrix<'_, usize>, 486 | pattern: Pattern, 487 | candidate: &mut CandidateV2, 488 | scheme: &Scheme, 489 | matches: &[usize], 490 | mut max_score: Score, 491 | mut max_score_cell: MatrixCell, 492 | ) -> (Score, MatrixCell) { 493 | let matrix_width = scores.width(); 494 | 495 | for row_idx in 1..scores.height() { 496 | let pattern_char = pattern.char(row_idx); 497 | 498 | let (prev_scores_row, scores_row) = 499 | scores.two_rows_mut(row_idx - 1, row_idx); 500 | 501 | let (prev_consecutives_row, consecutives_row) = 502 | consecutives.two_rows_mut(row_idx - 1, row_idx); 503 | 504 | let first_match_offset = matches[row_idx]; 505 | 506 | let mut column = first_match_offset; 507 | 508 | let mut penalty = penalty::GAP_START; 509 | 510 | for char_offset in 511 | candidate.matches_from(first_match_offset, pattern_char) 512 | { 513 | penalty = penalty::GAP_START; 514 | 515 | for col in column + 1..char_offset { 516 | let score_left = scores_row[col - 1]; 517 | let score = score_left.saturating_sub(penalty); 518 | scores_row[col] = score; 519 | penalty = penalty::GAP_EXTENSION; 520 | } 521 | 522 | column = char_offset; 523 | 524 | let score_left = scores_row[column - 1].saturating_sub(penalty); 525 | 526 | let mut score_up_left = prev_scores_row[column - 1] + bonus::MATCH; 527 | 528 | let mut bonus = candidate.bonus_at(column, scheme); 529 | 530 | let mut consecutive = prev_consecutives_row[column - 1] + 1; 531 | 532 | if consecutive > 1 { 533 | let fb = candidate.bonus_at(column + 1 - consecutive, scheme); 534 | 535 | if bonus >= bonus::BOUNDARY && bonus > fb { 536 | consecutive = 1; 537 | } else { 538 | bonus = bonus::CONSECUTIVE.max(fb).max(bonus); 539 | } 540 | } 541 | 542 | score_up_left += if score_up_left + bonus < score_left { 543 | consecutive = 0; 544 | candidate.bonus_at(column, scheme) 545 | } else { 546 | bonus 547 | }; 548 | 549 | let score = score_left.max(score_up_left); 550 | 551 | if score > max_score { 552 | max_score = score; 553 | max_score_cell = MatrixCell(row_idx * matrix_width + column); 554 | } 555 | 556 | consecutives_row[column] = consecutive; 557 | 558 | scores_row[column] = score; 559 | } 560 | 561 | for col in column + 1..matrix_width { 562 | let score_left = scores_row[col - 1]; 563 | let score = score_left.saturating_sub(penalty); 564 | scores_row[col] = score; 565 | penalty = penalty::GAP_EXTENSION; 566 | } 567 | } 568 | 569 | (max_score, max_score_cell) 570 | } 571 | 572 | /// TODO: docs 573 | #[inline] 574 | fn matched_ranges( 575 | scores: Matrix, 576 | consecutives: Matrix, 577 | max_score_cell: MatrixCell, 578 | candidate: Candidate, 579 | start_byte_offset: usize, 580 | ranges: &mut MatchedRanges, 581 | ) { 582 | let mut prefer_match = true; 583 | 584 | let mut cell = max_score_cell; 585 | 586 | let mut col = scores.col_of(max_score_cell); 587 | 588 | let mut row = scores.row_of(max_score_cell); 589 | 590 | loop { 591 | let is_cell_in_first_col = col == 0; 592 | 593 | let is_cell_in_first_row = row == 0; 594 | 595 | let score_left = 596 | if is_cell_in_first_col { 0 } else { scores[scores.left(cell)] }; 597 | 598 | let score_up_left = if is_cell_in_first_col || is_cell_in_first_row { 599 | 0 600 | } else { 601 | scores[scores.up_left(cell)] 602 | }; 603 | 604 | let prefer_this_match = prefer_match; 605 | 606 | prefer_match = consecutives[cell] > 1 607 | || consecutives 608 | .get_value(consecutives.down_right(cell)) 609 | .map_or(false, |down_right| down_right > 0); 610 | 611 | let score = scores[cell]; 612 | 613 | if score > score_up_left 614 | && (score > score_left || score == score_left && prefer_this_match) 615 | { 616 | let mut byte_offset = candidate.to_byte_offset(col); 617 | 618 | let ch = candidate.char(col); 619 | 620 | byte_offset += start_byte_offset; 621 | 622 | ranges.insert(byte_offset..byte_offset + ch.len_utf8()); 623 | 624 | if is_cell_in_first_row || is_cell_in_first_col { 625 | break; 626 | } else { 627 | row -= 1; 628 | cell = scores.up_left(cell); 629 | } 630 | } else if is_cell_in_first_col { 631 | break; 632 | } else { 633 | cell = scores.left(cell); 634 | } 635 | 636 | col -= 1; 637 | } 638 | } 639 | 640 | /// TODO: docs 641 | #[inline] 642 | fn fuzzy_single_char( 643 | pattern_char: char, 644 | candidate: Candidate, 645 | opts: CandidateOpts, 646 | scheme: &Scheme, 647 | ranges: &mut MatchedRanges, 648 | ) -> Option { 649 | let mut max_score = 0; 650 | 651 | let mut max_score_pos = 0; 652 | 653 | for char_offset in 654 | candidate.matches(pattern_char, opts.is_case_sensitive, opts.char_eq) 655 | { 656 | let prev_class = if char_offset == 0 { 657 | scheme.initial_char_class 658 | } else { 659 | char_class(candidate.char(char_offset - 1), scheme) 660 | }; 661 | 662 | let this_class = char_class(candidate.char(char_offset), scheme); 663 | 664 | let bonus = compute_bonus(prev_class, this_class, scheme); 665 | 666 | let score = bonus::MATCH + bonus * bonus::FIRST_QUERY_CHAR_MULTIPLIER; 667 | 668 | if score > max_score { 669 | max_score = score; 670 | max_score_pos = char_offset; 671 | } 672 | } 673 | 674 | if max_score == 0 { 675 | return None; 676 | } 677 | 678 | if RANGES { 679 | let start = candidate.to_byte_offset(max_score_pos); 680 | let byte_len = candidate.char(max_score_pos).len_utf8(); 681 | ranges.insert(start..start + byte_len); 682 | } 683 | 684 | Some(max_score) 685 | } 686 | -------------------------------------------------------------------------------- /src/metrics/fzf/mod.rs: -------------------------------------------------------------------------------- 1 | //! Metrics implementing fzf's algorithms. 2 | //! 3 | //! This module contains two metrics -- [`FzfV1`] and [`FzfV2`] -- which were 4 | //! ported from [fzf], a popular command-line fuzzy-finder. 5 | //! The behavior of both metrics is intended to always match that of the latest 6 | //! release of fzf. Any discrepancy between our implementation and fzf's should 7 | //! be considered a bug. 8 | //! 9 | //! ## Extended-search mode 10 | //! 11 | //! fzf's [extended-search mode][esm] is fully-supported by parsing the query 12 | //! with [`FzfParser::parse`]. 13 | //! 14 | //! In extended-search mode, spaces in the query are treated as logical AND 15 | //! operators, while the pipe character `|` is treated as a logical OR. For 16 | //! example, the query `"foo bar | baz"` would only match candidates that 17 | //! contain `"foo"` and either `"bar"` or `"baz"`. It's also possible to query 18 | //! for candidates that either begin or end with a certain string, to negate a 19 | //! query, and more. 20 | //! 21 | //! To know more about extended-search mode's syntax you can look directly at 22 | //! [fzf's docs][esm] on it, or at the documentation of the [`FzfParser`]. 23 | //! 24 | //! [fzf]: https://github.com/junegunn/fzf 25 | //! [esm]: https://github.com/junegunn/fzf#search-syntax 26 | 27 | mod candidate; 28 | mod distance; 29 | mod fzf; 30 | #[cfg(feature = "fzf-v1")] 31 | mod fzf_v1; 32 | #[cfg(feature = "fzf-v2")] 33 | mod fzf_v2; 34 | mod parser; 35 | mod query; 36 | mod scheme; 37 | mod scoring; 38 | mod slab; 39 | 40 | use candidate::*; 41 | pub use distance::FzfDistance; 42 | use distance::*; 43 | use fzf::*; 44 | #[cfg(feature = "fzf-v1")] 45 | pub use fzf_v1::FzfV1; 46 | #[cfg(feature = "fzf-v2")] 47 | pub use fzf_v2::FzfV2; 48 | pub use parser::*; 49 | pub use query::FzfQuery; 50 | pub use scheme::FzfScheme; 51 | #[doc(hidden)] 52 | pub use scheme::Scheme; 53 | use scoring::*; 54 | use slab::*; 55 | 56 | #[doc(hidden)] 57 | pub mod bonus { 58 | //! TODO: docs 59 | 60 | use super::*; 61 | 62 | /// TODO: docs 63 | pub const MATCH: Score = 16; 64 | 65 | /// TODO: docs 66 | pub const BOUNDARY: Score = MATCH / 2; 67 | 68 | /// TODO: docs 69 | pub const NON_WORD: Score = MATCH / 2; 70 | 71 | /// TODO: docs 72 | pub const CAMEL_123: Score = BOUNDARY - penalty::GAP_EXTENSION; 73 | 74 | /// TODO: docs 75 | pub const CONSECUTIVE: Score = penalty::GAP_START + penalty::GAP_EXTENSION; 76 | 77 | /// TODO: docs 78 | pub const FIRST_QUERY_CHAR_MULTIPLIER: Score = 2; 79 | } 80 | 81 | #[doc(hidden)] 82 | pub mod penalty { 83 | //! TODO: docs 84 | 85 | use super::*; 86 | 87 | /// TODO: docs 88 | pub const GAP_START: Score = 3; 89 | 90 | /// TODO: docs 91 | pub const GAP_EXTENSION: Score = 1; 92 | } 93 | -------------------------------------------------------------------------------- /src/metrics/fzf/parser.rs: -------------------------------------------------------------------------------- 1 | use core::mem::transmute; 2 | 3 | use super::query::{Condition, FzfQuery, Pattern}; 4 | use crate::utils; 5 | 6 | /// The parser used to parse strings into [`FzfQuery`]s. 7 | /// 8 | /// Queries can be parsed according to fzf's [extended-search mode][esm] via 9 | /// [`parse`][FzfParser::parse]. If this is not desired, use 10 | /// [`parse_not_extended`][FzfParser::parse_not_extended] instead. 11 | /// 12 | /// [esm]: https://github.com/junegunn/fzf#search-syntax 13 | #[derive(Clone)] 14 | pub struct FzfParser { 15 | /// TODO: docs 16 | chars: Vec, 17 | 18 | /// TODO: docs 19 | patterns: Vec>, 20 | 21 | /// TODO: docs 22 | conditions: Vec>, 23 | } 24 | 25 | impl Default for FzfParser { 26 | #[inline] 27 | fn default() -> Self { 28 | Self { 29 | chars: vec![char::default(); 64], 30 | patterns: vec![Pattern::default(); 64], 31 | conditions: vec![Condition::default(); 64], 32 | } 33 | } 34 | } 35 | 36 | impl core::fmt::Debug for FzfParser { 37 | #[inline] 38 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 39 | f.debug_struct("FzfParser").finish_non_exhaustive() 40 | } 41 | } 42 | 43 | impl FzfParser { 44 | /// Creates a new `FzfParser`. 45 | #[inline] 46 | pub fn new() -> Self { 47 | Self::default() 48 | } 49 | 50 | /// Parses the given query string according to fzf's 51 | /// [extended-search mode][esm]. 52 | /// 53 | /// In extended-search mode certain characters change how the query is 54 | /// matched in candidates. 55 | /// 56 | /// In particular: 57 | /// 58 | /// | Pattern | Matches | 59 | /// | ------- | -------------------------------------------- | 60 | /// | `foo` | candidates that fuzzy-match `"foo"` | 61 | /// | `'foo` | candidates that include `"foo"` | 62 | /// | `^foo` | candidates that start with `"foo"` | 63 | /// | `foo$` | candidates that end with `"foo"` | 64 | /// | `!foo` | candidates that **don't** include `"foo"` | 65 | /// | `!^foo` | candidates that **don't** start with `"foo"` | 66 | /// | `!foo$` | candidates that **don't** end with `"foo"` | 67 | /// 68 | /// It's also possible to query for multiple patterns by separating them 69 | /// with spaces or with the pipe character `"|"`. A space acts as a logical 70 | /// AND operator, while a pipe character acts as a logical OR operator. 71 | /// 72 | /// For example, the query `"^main .c$ | .rs$"` would only match candidates 73 | /// that start with `"main"` and end with either `".c"` or `".rs"`. 74 | /// Spaces can be escaped with a backslash if they're part of a pattern, 75 | /// e.g. `"foo\ baz"` will match `"foo bar baz"` but not `"baz foo"`. 76 | /// 77 | /// Note that like in fzf, but unlike in logical expressions, the pipe 78 | /// character (OR) has a higher precedence than the space character (AND), 79 | /// so that `"foo bar | baz"` gets parsed as `"foo && (bar || baz)"`, and 80 | /// **not** as `"(foo && bar) || baz"`; 81 | /// 82 | /// If you want to treat all the characters in the query as fuzzy-matching, 83 | /// use [`parse_not_extended`][FzfParser::parse_not_extended] instead. 84 | /// 85 | /// [esm]: https://github.com/junegunn/fzf#search-syntax 86 | #[inline] 87 | pub fn parse<'a>(&'a mut self, query: &str) -> FzfQuery<'a> { 88 | let max_chars = query.len(); 89 | 90 | if self.chars.len() < max_chars { 91 | self.chars.resize(max_chars, char::default()); 92 | } 93 | 94 | // The theoretical maximum number of conditions that could be included 95 | // in the query. 96 | // 97 | // The actual number of conditions (which we'll only know after 98 | // parsing) matches this maximum on space-separated queries of 99 | // multiple ascii characters, e.g. `a b c d`. 100 | let max_conditions = query.len() / 2 + 1; 101 | 102 | if self.conditions.len() < max_conditions { 103 | self.conditions.resize(max_conditions, Condition::default()); 104 | } 105 | 106 | if self.patterns.len() < max_conditions { 107 | self.patterns.resize(max_conditions, Pattern::default()); 108 | } 109 | 110 | let patterns: &'a mut [Pattern<'static>] = 111 | self.patterns.as_mut_slice(); 112 | 113 | // SAFETY: todo. 114 | let patterns = unsafe { 115 | transmute::<&'a mut [Pattern<'static>], &'a mut [Pattern<'a>]>( 116 | patterns, 117 | ) 118 | }; 119 | 120 | let mut num_conditions = 0; 121 | 122 | for condition in 123 | Patterns::new(patterns, &mut self.chars, query).map(Condition::new) 124 | { 125 | // SAFETY: todo 126 | let condition = unsafe { 127 | transmute::>(condition) 128 | }; 129 | 130 | self.conditions[num_conditions] = condition; 131 | 132 | num_conditions += 1; 133 | } 134 | 135 | FzfQuery::new_extended(&self.conditions[..num_conditions]) 136 | } 137 | 138 | /// Parses the given query string without using fzf's extended-search mode. 139 | /// 140 | /// All the characters in the query string are used for fuzzy-matching, 141 | /// with no special meaning attached to any of them. This is equivalent to 142 | /// calling `fzf` with the `--no-extended` flag. 143 | /// 144 | /// If you want to apply fzf's extended-search mode to the query, parse it 145 | /// with [`parse`][FzfParser::parse] instead. 146 | /// 147 | /// # Examples 148 | /// 149 | /// ```rust 150 | /// # use norm::fzf::{FzfParser, FzfV2}; 151 | /// # use norm::Metric; 152 | /// let mut fzf = FzfV2::new(); 153 | /// let mut parser = FzfParser::new(); 154 | /// let mut ranges = Vec::new(); 155 | /// 156 | /// let query = parser.parse_not_extended("^bar | baz$"); 157 | /// 158 | /// let distance = 159 | /// fzf.distance_and_ranges(query, "^foo bar | baz $ foo", &mut ranges); 160 | /// 161 | /// // We expect a match since the characters in the query fuzzy-match the 162 | /// // candidate. 163 | /// // 164 | /// // If we parsed the query by calling `parse` there wouldn't have been a 165 | /// // match since the candidate doesn't start with `"bar"` nor does it end 166 | /// // with `"baz"`. 167 | /// assert!(distance.is_some()); 168 | /// 169 | /// assert_eq!(ranges, [0..1, 5..14, 15..16]); 170 | /// ``` 171 | #[inline] 172 | pub fn parse_not_extended<'a>(&'a mut self, query: &str) -> FzfQuery<'a> { 173 | let mut char_len = 0; 174 | 175 | for ch in query.chars() { 176 | self.chars[char_len] = ch; 177 | char_len += 1; 178 | } 179 | 180 | FzfQuery::new_not_extended(&self.chars[..char_len]) 181 | } 182 | } 183 | 184 | const OR_BLOCK_SEPARATOR: &[char] = &['|']; 185 | 186 | /// TODO: docs 187 | struct Patterns<'buf, 's> { 188 | /// TODO: docs 189 | buf: &'buf mut [Pattern<'buf>], 190 | 191 | /// TODO: docs 192 | allocated: usize, 193 | 194 | /// TODO: docs 195 | words: Words<'buf, 's>, 196 | 197 | /// TODO: docs 198 | next: Option>, 199 | } 200 | 201 | impl<'buf, 's> Patterns<'buf, 's> { 202 | #[inline] 203 | fn alloc(&mut self, pattern: Pattern<'buf>) { 204 | self.buf[self.allocated] = pattern; 205 | self.allocated += 1; 206 | } 207 | 208 | #[inline] 209 | fn new( 210 | patterns_buf: &'buf mut [Pattern<'buf>], 211 | char_buf: &'buf mut [char], 212 | s: &'s str, 213 | ) -> Self { 214 | Self { 215 | buf: patterns_buf, 216 | allocated: 0, 217 | words: Words::new(char_buf, s), 218 | next: None, 219 | } 220 | } 221 | } 222 | 223 | impl<'buf, 's> Iterator for Patterns<'buf, 's> { 224 | type Item = &'buf [Pattern<'buf>]; 225 | 226 | #[inline] 227 | fn next(&mut self) -> Option { 228 | let prev_allocated = self.allocated; 229 | 230 | // Whether we're expecting the next word yielded by `self.words` to be 231 | // a "|". This is set to true after getting a word, and set to false 232 | // after a "|". 233 | let mut looking_for_or; 234 | 235 | if let Some(first_pattern) = self.next.take() { 236 | self.alloc(first_pattern); 237 | looking_for_or = true; 238 | } else { 239 | looking_for_or = false; 240 | } 241 | 242 | loop { 243 | let Some(word) = self.words.next() else { 244 | break; 245 | }; 246 | 247 | let word_is_condition = word != OR_BLOCK_SEPARATOR; 248 | 249 | if word_is_condition { 250 | let Some(word) = Pattern::parse(word) else { continue }; 251 | 252 | if looking_for_or { 253 | self.next = Some(word); 254 | break; 255 | } else { 256 | self.alloc(word); 257 | looking_for_or = true; 258 | continue; 259 | } 260 | } 261 | 262 | looking_for_or = false; 263 | } 264 | 265 | if self.allocated == prev_allocated { 266 | return None; 267 | } 268 | 269 | let patterns = &self.buf[prev_allocated..self.allocated]; 270 | 271 | // SAFETY: todo 272 | let patterns = 273 | unsafe { transmute::<&[Pattern], &'buf [Pattern]>(patterns) }; 274 | 275 | Some(patterns) 276 | } 277 | } 278 | 279 | /// An iterator over the words of a string. 280 | /// 281 | /// Here, a "word" is simply a string of consecutive non-ascii-space 282 | /// characters. Escaped spaces are treated as non-space characters. 283 | /// 284 | /// # Examples 285 | /// 286 | /// ```rust 287 | /// # use norm::fzf::words; 288 | /// let mut words = words("foo 'bar' \"baz\""); 289 | /// assert_eq!(words.next().as_deref(), Some("foo")); 290 | /// assert_eq!(words.next().as_deref(), Some("'bar'")); 291 | /// assert_eq!(words.next().as_deref(), Some("\"baz\"")); 292 | /// assert_eq!(words.next(), None); 293 | /// ``` 294 | /// 295 | /// ```rust 296 | /// # use norm::fzf::words; 297 | /// let mut words = words("foo\\ bar baz"); 298 | /// assert_eq!(words.next().as_deref(), Some("foo bar")); 299 | /// assert_eq!(words.next().as_deref(), Some("baz")); 300 | /// assert_eq!(words.next(), None); 301 | /// ``` 302 | /// 303 | /// ```rust 304 | /// # use norm::fzf::words; 305 | /// let mut words = words("foo \\ bar"); 306 | /// assert_eq!(words.next().as_deref(), Some("foo")); 307 | /// assert_eq!(words.next().as_deref(), Some(" bar")); 308 | /// assert_eq!(words.next(), None); 309 | /// ``` 310 | #[doc(hidden)] 311 | pub struct Words<'buf, 'sentence> { 312 | /// TODO: docs 313 | buf: &'buf mut [char], 314 | 315 | /// TODO: docs 316 | allocated: usize, 317 | 318 | /// TODO: docs 319 | s: &'sentence str, 320 | } 321 | 322 | impl<'buf, 'sentence> Words<'buf, 'sentence> { 323 | /// TODO: docs 324 | #[inline] 325 | fn alloc(&mut self, s: &str) { 326 | for ch in s.chars() { 327 | self.buf[self.allocated] = ch; 328 | self.allocated += 1; 329 | } 330 | } 331 | 332 | /// TODO: docs 333 | #[inline] 334 | fn new(buf: &'buf mut [char], s: &'sentence str) -> Self { 335 | Self { buf, s: utils::strip_leading_spaces(s), allocated: 0 } 336 | } 337 | } 338 | 339 | impl<'buf> Iterator for Words<'buf, '_> { 340 | type Item = &'buf [char]; 341 | 342 | #[inline(always)] 343 | fn next(&mut self) -> Option { 344 | if self.s.is_empty() { 345 | return None; 346 | } 347 | 348 | let prev_allocated = self.allocated; 349 | 350 | let mut word_byte_end = 0; 351 | 352 | let mut s = self.s; 353 | 354 | loop { 355 | match memchr::memchr(b' ', s.as_bytes()) { 356 | Some(0) => break, 357 | 358 | Some(offset) if s.as_bytes()[offset - 1] == b'\\' => { 359 | // Push everything up to (but not including) the escape. 360 | self.alloc(&s[..offset - 1]); 361 | 362 | // ..skip the escape.. 363 | 364 | // ..and push the space. 365 | self.alloc(" "); 366 | 367 | s = &s[offset + 1..]; 368 | 369 | word_byte_end += offset + 1; 370 | }, 371 | 372 | Some(offset) => { 373 | let s = &s[..offset]; 374 | self.alloc(s); 375 | word_byte_end += s.len(); 376 | break; 377 | }, 378 | 379 | None => { 380 | self.alloc(s); 381 | word_byte_end += s.len(); 382 | break; 383 | }, 384 | } 385 | } 386 | 387 | self.s = utils::strip_leading_spaces(&self.s[word_byte_end..]); 388 | 389 | let word = &self.buf[prev_allocated..self.allocated]; 390 | 391 | // SAFETY: todo 392 | let word = unsafe { transmute::<&[char], &'buf [char]>(word) }; 393 | 394 | Some(word) 395 | } 396 | } 397 | 398 | /// TODO: docs 399 | #[cfg(feature = "__tests")] 400 | #[doc(hidden)] 401 | pub fn parse(s: &str) -> FzfQuery<'static> { 402 | let parser = Box::leak(Box::new(FzfParser::new())); 403 | parser.parse(s) 404 | } 405 | 406 | #[cfg(test)] 407 | mod parse_tests { 408 | use super::super::query::*; 409 | use super::*; 410 | 411 | #[test] 412 | fn parse_query_empty() { 413 | assert!(parse("").is_empty()); 414 | } 415 | 416 | #[test] 417 | fn parse_query_single_fuzzy() { 418 | let query = parse("foo"); 419 | 420 | let SearchMode::NotExtended(pattern) = query.search_mode else { 421 | panic!(); 422 | }; 423 | 424 | assert_eq!(pattern.into_string(), "foo"); 425 | 426 | assert_eq!(pattern.match_type, MatchType::Fuzzy); 427 | } 428 | 429 | #[test] 430 | fn parse_query_upstream_extended() { 431 | let query = parse( 432 | "aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | \ 433 | zzz$ | !ZZZ |", 434 | ); 435 | 436 | let SearchMode::Extended(conditions) = query.search_mode else { 437 | panic!(); 438 | }; 439 | 440 | assert_eq!(conditions.len(), 9); 441 | 442 | let pattern = conditions[0].or_patterns()[0]; 443 | assert_eq!(pattern.match_type, MatchType::Fuzzy); 444 | assert!(!pattern.is_inverse); 445 | 446 | let pattern = conditions[1].or_patterns()[0]; 447 | assert_eq!(pattern.match_type, MatchType::Exact); 448 | assert!(!pattern.is_inverse); 449 | 450 | let pattern = conditions[2].or_patterns()[0]; 451 | assert_eq!(pattern.match_type, MatchType::PrefixExact); 452 | assert!(!pattern.is_inverse); 453 | 454 | let pattern = conditions[3].or_patterns()[0]; 455 | assert_eq!(pattern.match_type, MatchType::SuffixExact); 456 | assert!(!pattern.is_inverse); 457 | 458 | let pattern = conditions[4].or_patterns()[0]; 459 | assert_eq!(pattern.match_type, MatchType::Exact); 460 | assert!(pattern.is_inverse); 461 | 462 | let pattern = conditions[5].or_patterns()[0]; 463 | assert_eq!(pattern.match_type, MatchType::Fuzzy); 464 | assert!(pattern.is_inverse); 465 | 466 | let pattern = conditions[6].or_patterns()[0]; 467 | assert_eq!(pattern.match_type, MatchType::PrefixExact); 468 | assert!(pattern.is_inverse); 469 | 470 | let pattern = conditions[7].or_patterns()[0]; 471 | assert_eq!(pattern.match_type, MatchType::SuffixExact); 472 | assert!(pattern.is_inverse); 473 | 474 | let pattern = conditions[7].or_patterns()[1]; 475 | assert_eq!(pattern.match_type, MatchType::EqualExact); 476 | assert!(!pattern.is_inverse); 477 | 478 | let pattern = conditions[8].or_patterns()[0]; 479 | assert_eq!(pattern.match_type, MatchType::PrefixExact); 480 | assert!(!pattern.is_inverse); 481 | 482 | let pattern = conditions[8].or_patterns()[1]; 483 | assert_eq!(pattern.match_type, MatchType::Exact); 484 | assert!(!pattern.is_inverse); 485 | 486 | let pattern = conditions[8].or_patterns()[2]; 487 | assert_eq!(pattern.match_type, MatchType::SuffixExact); 488 | assert!(!pattern.is_inverse); 489 | 490 | let pattern = conditions[8].or_patterns()[3]; 491 | assert_eq!(pattern.match_type, MatchType::Exact); 492 | assert!(pattern.is_inverse); 493 | } 494 | } 495 | 496 | #[cfg(test)] 497 | mod patterns_tests { 498 | use super::*; 499 | 500 | fn patterns( 501 | s: &str, 502 | ) -> impl Iterator]> + '_ { 503 | let patterns_buf = vec![Pattern::default(); s.len() / 2 + 1].leak(); 504 | let char_buf = vec![char::default(); s.len()].leak(); 505 | Patterns::new(patterns_buf, char_buf, s) 506 | } 507 | 508 | fn pattern(s: &str) -> Pattern<'static> { 509 | Pattern::parse(s.chars().collect::>().leak()).unwrap() 510 | } 511 | 512 | #[test] 513 | fn patterns_empty() { 514 | let mut blocks = patterns(""); 515 | assert!(blocks.next().is_none()); 516 | } 517 | 518 | #[test] 519 | fn patterns_single() { 520 | let mut blocks = patterns("foo"); 521 | assert_eq!(blocks.next().unwrap(), [pattern("foo")]); 522 | assert_eq!(blocks.next(), None); 523 | } 524 | 525 | #[test] 526 | fn patterns_multiple_ors() { 527 | let mut blocks = patterns("foo | bar | baz"); 528 | assert_eq!( 529 | blocks.next().unwrap(), 530 | [pattern("foo"), pattern("bar"), pattern("baz")] 531 | ); 532 | assert_eq!(blocks.next(), None); 533 | } 534 | 535 | #[test] 536 | fn patterns_multiple_ands() { 537 | let mut blocks = patterns("foo bar baz"); 538 | assert_eq!(blocks.next().unwrap(), [pattern("foo")]); 539 | assert_eq!(blocks.next().unwrap(), [pattern("bar")]); 540 | assert_eq!(blocks.next().unwrap(), [pattern("baz")]); 541 | assert_eq!(blocks.next(), None); 542 | } 543 | 544 | #[test] 545 | fn patterns_empty_between_ors() { 546 | let mut blocks = patterns("foo | | bar"); 547 | assert_eq!(blocks.next().unwrap(), [pattern("foo"), pattern("bar")]); 548 | assert_eq!(blocks.next(), None); 549 | } 550 | 551 | #[test] 552 | fn patterns_multiple_ors_multiple_ands() { 553 | let mut blocks = patterns("foo | bar baz qux | quux | corge"); 554 | assert_eq!(blocks.next().unwrap(), [pattern("foo"), pattern("bar")]); 555 | assert_eq!(blocks.next().unwrap(), [pattern("baz")]); 556 | assert_eq!( 557 | blocks.next().unwrap(), 558 | [pattern("qux"), pattern("quux"), pattern("corge")] 559 | ); 560 | assert_eq!(blocks.next(), None); 561 | } 562 | } 563 | 564 | #[cfg(feature = "__tests")] 565 | #[doc(hidden)] 566 | pub fn words(s: &str) -> impl Iterator { 567 | let mut buf = Vec::new(); 568 | 569 | buf.resize(s.len(), char::default()); 570 | 571 | Words::new(&mut buf, s) 572 | .map(String::from_iter) 573 | .collect::>() 574 | .into_iter() 575 | } 576 | 577 | #[cfg(test)] 578 | mod word_tests { 579 | use super::*; 580 | 581 | #[test] 582 | fn words_empty() { 583 | let mut words = words(""); 584 | assert!(words.next().is_none()); 585 | } 586 | 587 | #[test] 588 | fn words_single() { 589 | let mut words = words("foo"); 590 | assert_eq!(words.next().as_deref(), Some("foo")); 591 | assert_eq!(words.next(), None); 592 | } 593 | 594 | #[test] 595 | fn words_escaped_escape_escaped_space() { 596 | let mut words = words("\\\\ "); 597 | assert_eq!(words.next().as_deref(), Some("\\ ")); 598 | assert_eq!(words.next(), None); 599 | } 600 | 601 | #[test] 602 | fn words_multiple() { 603 | let mut words = words("foo bar"); 604 | assert_eq!(words.next().as_deref(), Some("foo")); 605 | assert_eq!(words.next().as_deref(), Some("bar")); 606 | assert_eq!(words.next(), None); 607 | } 608 | 609 | #[test] 610 | fn words_multiple_leading_trailing_spaces() { 611 | let mut words = words(" foo bar "); 612 | assert_eq!(words.next().as_deref(), Some("foo")); 613 | assert_eq!(words.next().as_deref(), Some("bar")); 614 | assert_eq!(words.next(), None); 615 | } 616 | 617 | #[test] 618 | fn words_multiple_escaped_spaces() { 619 | let mut words = words("foo\\ bar\\ baz"); 620 | assert_eq!(words.next().as_deref(), Some("foo bar baz")); 621 | assert_eq!(words.next(), None); 622 | } 623 | 624 | #[test] 625 | fn words_multiple_standalone_escaped_spaces() { 626 | let mut words = words(" \\ foo \\ bar \\ "); 627 | assert_eq!(words.next().as_deref(), Some(" ")); 628 | assert_eq!(words.next().as_deref(), Some("foo")); 629 | assert_eq!(words.next().as_deref(), Some(" bar")); 630 | assert_eq!(words.next().as_deref(), Some(" ")); 631 | assert_eq!(words.next(), None); 632 | } 633 | 634 | #[test] 635 | fn words_single_escaped_spaces() { 636 | let mut words = words("\\ "); 637 | assert_eq!(words.next().as_deref(), Some(" ")); 638 | assert_eq!(words.next(), None); 639 | } 640 | 641 | #[test] 642 | fn words_consecutive_escaped_spaces() { 643 | let mut words = words(" \\ \\ \\ "); 644 | assert_eq!(words.next().as_deref(), Some(" ")); 645 | assert_eq!(words.next(), None); 646 | } 647 | } 648 | -------------------------------------------------------------------------------- /src/metrics/fzf/query.rs: -------------------------------------------------------------------------------- 1 | use core::fmt::Write; 2 | 3 | /// A parsed fzf query. 4 | /// 5 | /// This struct is created by parsing a query string via the 6 | /// [`FzfParser`](super::FzfParser). See its documentation for more. 7 | #[derive(Clone, Copy)] 8 | pub struct FzfQuery<'a> { 9 | pub(super) search_mode: SearchMode<'a>, 10 | } 11 | 12 | /// TODO: docs 13 | #[derive(Clone, Copy)] 14 | pub(super) enum SearchMode<'a> { 15 | /// TODO: docs 16 | Extended(&'a [Condition<'a>]), 17 | 18 | /// TODO: docs 19 | NotExtended(Pattern<'a>), 20 | } 21 | 22 | impl core::fmt::Debug for FzfQuery<'_> { 23 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 24 | let s = match self.search_mode { 25 | SearchMode::Extended(conditions) => conditions 26 | .iter() 27 | .map(|condition| format!("{:?}", condition)) 28 | .collect::>() 29 | .join(" && "), 30 | 31 | SearchMode::NotExtended(pattern) => pattern.into_string(), 32 | }; 33 | 34 | f.debug_tuple("FzfQuery").field(&s).finish() 35 | } 36 | } 37 | 38 | impl<'a> FzfQuery<'a> { 39 | /// TODO: docs 40 | #[inline] 41 | pub(super) fn is_empty(&self) -> bool { 42 | match self.search_mode { 43 | SearchMode::Extended(conditions) => conditions.is_empty(), 44 | SearchMode::NotExtended(pattern) => pattern.is_empty(), 45 | } 46 | } 47 | 48 | /// TODO: docs 49 | #[inline] 50 | pub(super) fn new_extended(conditions: &'a [Condition<'a>]) -> Self { 51 | // If there's only one condition with a single pattern, and that 52 | // pattern is fuzzy, then we can use the non-extended search mode. 53 | if conditions.len() == 1 { 54 | let mut patterns = conditions[0].iter(); 55 | 56 | let first_pattern = patterns 57 | .next() 58 | .expect("conditions always have at least one pattern"); 59 | 60 | if patterns.next().is_none() 61 | && matches!(first_pattern.match_type, MatchType::Fuzzy) 62 | { 63 | return Self { 64 | search_mode: SearchMode::NotExtended(first_pattern), 65 | }; 66 | } 67 | } 68 | 69 | Self { search_mode: SearchMode::Extended(conditions) } 70 | } 71 | 72 | /// TODO: docs 73 | #[inline] 74 | pub(super) fn new_not_extended(chars: &'a [char]) -> Self { 75 | Self { search_mode: SearchMode::NotExtended(Pattern::raw(chars)) } 76 | } 77 | } 78 | 79 | /// TODO: docs 80 | #[derive(Default, Clone, Copy)] 81 | pub(super) struct Condition<'a> { 82 | /// TODO: docs 83 | pub(super) or_patterns: &'a [Pattern<'a>], 84 | } 85 | 86 | impl core::fmt::Debug for Condition<'_> { 87 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 88 | match self.or_patterns { 89 | [] => Ok(()), 90 | 91 | [pattern] => pattern.into_string().fmt(f), 92 | 93 | _ => { 94 | f.write_char('(')?; 95 | 96 | let len = self.or_patterns.len(); 97 | 98 | for (idx, pattern) in self.iter().enumerate() { 99 | let is_last = idx + 1 == len; 100 | 101 | pattern.into_string().fmt(f)?; 102 | 103 | if !is_last { 104 | f.write_str(" || ")?; 105 | } 106 | } 107 | 108 | f.write_char(')') 109 | }, 110 | } 111 | } 112 | } 113 | 114 | impl<'a> Condition<'a> { 115 | #[cfg(test)] 116 | pub(super) fn or_patterns(&self) -> &'a [Pattern<'a>] { 117 | self.or_patterns 118 | } 119 | 120 | #[inline] 121 | pub(super) fn iter( 122 | &self, 123 | ) -> impl ExactSizeIterator> + '_ { 124 | self.or_patterns.iter().copied() 125 | } 126 | 127 | #[inline] 128 | pub(super) fn new(or_patterns: &'a [Pattern<'a>]) -> Self { 129 | Self { or_patterns } 130 | } 131 | } 132 | 133 | /// TODO: docs 134 | #[derive(Default, Clone, Copy)] 135 | #[cfg_attr(test, derive(PartialEq))] 136 | pub(super) struct Pattern<'a> { 137 | /// TODO: docs 138 | text: &'a [char], 139 | 140 | /// Whether any of the characters in [`Self::text`] are uppercase. 141 | pub(super) has_uppercase: bool, 142 | 143 | /// TODO: docs 144 | pub(super) match_type: MatchType, 145 | 146 | /// TODO: docs 147 | pub(super) is_inverse: bool, 148 | 149 | /// TODO: docs 150 | pub(super) leading_spaces: usize, 151 | 152 | /// TODO: docs 153 | pub(super) trailing_spaces: usize, 154 | } 155 | 156 | impl core::fmt::Debug for Pattern<'_> { 157 | #[inline] 158 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 159 | self.into_string().fmt(f) 160 | } 161 | } 162 | 163 | impl<'a> Pattern<'a> { 164 | /// TODO: docs 165 | #[inline(always)] 166 | pub(super) fn char_len(&self) -> usize { 167 | self.text.len() 168 | } 169 | 170 | /// TODO: docs 171 | #[inline(always)] 172 | pub(super) fn char(&self, idx: usize) -> char { 173 | self.text[idx] 174 | } 175 | 176 | /// TODO: docs 177 | #[inline] 178 | pub(crate) fn chars(&self) -> impl DoubleEndedIterator + '_ { 179 | self.text.iter().copied() 180 | } 181 | 182 | /// TODO: docs 183 | #[inline] 184 | pub(super) fn is_empty(&self) -> bool { 185 | self.text.is_empty() 186 | } 187 | 188 | /// TODO: docs 189 | #[inline] 190 | pub(super) fn into_string(self) -> String { 191 | self.text.iter().collect::() 192 | } 193 | 194 | /// TODO: docs 195 | #[inline(always)] 196 | pub(super) fn leading_spaces(&self) -> usize { 197 | self.leading_spaces 198 | } 199 | 200 | /// TODO: docs 201 | #[inline] 202 | fn raw(text: &'a [char]) -> Self { 203 | let leading_spaces = text.iter().take_while(|&&c| c == ' ').count(); 204 | 205 | let trailing_spaces = 206 | text.iter().rev().take_while(|&&c| c == ' ').count(); 207 | 208 | Self { 209 | leading_spaces, 210 | trailing_spaces, 211 | has_uppercase: text.iter().copied().any(char::is_uppercase), 212 | text, 213 | match_type: MatchType::Fuzzy, 214 | is_inverse: false, 215 | } 216 | } 217 | 218 | /// TODO: docs 219 | #[inline] 220 | pub(super) fn parse(mut text: &'a [char]) -> Option { 221 | debug_assert!(!text.is_empty()); 222 | 223 | let mut is_inverse = false; 224 | 225 | let mut match_type = MatchType::Fuzzy; 226 | 227 | if starts_with(text, '!') { 228 | is_inverse = true; 229 | match_type = MatchType::Exact; 230 | text = &text[1..]; 231 | } 232 | 233 | if ends_with(text, '$') && text.len() > 1 { 234 | match_type = MatchType::SuffixExact; 235 | text = &text[..text.len() - 1]; 236 | } 237 | 238 | if starts_with(text, '\'') { 239 | match_type = 240 | if !is_inverse { MatchType::Exact } else { MatchType::Fuzzy }; 241 | 242 | text = &text[1..]; 243 | } else if starts_with(text, '^') { 244 | match_type = if match_type == MatchType::SuffixExact { 245 | MatchType::EqualExact 246 | } else { 247 | MatchType::PrefixExact 248 | }; 249 | 250 | text = &text[1..]; 251 | } 252 | 253 | if text.is_empty() { 254 | return None; 255 | } 256 | 257 | let has_uppercase = text.iter().copied().any(char::is_uppercase); 258 | 259 | let leading_spaces = text.iter().take_while(|&&c| c == ' ').count(); 260 | 261 | let trailing_spaces = 262 | text.iter().rev().take_while(|&&c| c == ' ').count(); 263 | 264 | let this = Self { 265 | is_inverse, 266 | match_type, 267 | text, 268 | has_uppercase, 269 | leading_spaces, 270 | trailing_spaces, 271 | }; 272 | 273 | Some(this) 274 | } 275 | 276 | /// TODO: docs 277 | #[inline(always)] 278 | pub(super) fn trailing_spaces(&self) -> usize { 279 | self.trailing_spaces 280 | } 281 | } 282 | 283 | #[inline(always)] 284 | fn ends_with(haystack: &[char], needle: char) -> bool { 285 | haystack.last().copied() == Some(needle) 286 | } 287 | 288 | #[inline(always)] 289 | fn starts_with(haystack: &[char], needle: char) -> bool { 290 | haystack.first().copied() == Some(needle) 291 | } 292 | 293 | /// TODO: docs 294 | #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] 295 | pub(super) enum MatchType { 296 | /// TODO: docs 297 | #[default] 298 | Fuzzy, 299 | 300 | /// TODO: docs 301 | Exact, 302 | 303 | /// TODO: docs 304 | PrefixExact, 305 | 306 | /// TODO: docs 307 | SuffixExact, 308 | 309 | /// TODO: docs 310 | EqualExact, 311 | } 312 | 313 | #[cfg(test)] 314 | mod tests { 315 | use super::*; 316 | 317 | #[test] 318 | fn pattern_parse_specials_1() { 319 | assert!(Pattern::parse(&['\'']).is_none()); 320 | assert!(Pattern::parse(&['^']).is_none()); 321 | assert!(Pattern::parse(&['!']).is_none()); 322 | 323 | let pattern = Pattern::parse(&['$']).unwrap(); 324 | assert_eq!(pattern.into_string(), "$"); 325 | assert_eq!(pattern.match_type, MatchType::Fuzzy); 326 | } 327 | 328 | #[test] 329 | fn pattern_parse_specials_2() { 330 | assert!(Pattern::parse(&['!', '\'']).is_none()); 331 | assert!(Pattern::parse(&['!', '^']).is_none()); 332 | assert!(Pattern::parse(&['\'', '$']).is_none()); 333 | assert!(Pattern::parse(&['^', '$']).is_none()); 334 | 335 | let pattern = Pattern::parse(&['\'', '^']).unwrap(); 336 | assert_eq!(pattern.into_string(), "^"); 337 | assert_eq!(pattern.match_type, MatchType::Exact); 338 | 339 | let pattern = Pattern::parse(&['!', '$']).unwrap(); 340 | assert_eq!(pattern.into_string(), "$"); 341 | assert_eq!(pattern.match_type, MatchType::Exact); 342 | assert!(pattern.is_inverse); 343 | 344 | let pattern = Pattern::parse(&['!', '!']).unwrap(); 345 | assert_eq!(pattern.into_string(), "!"); 346 | assert_eq!(pattern.match_type, MatchType::Exact); 347 | assert!(pattern.is_inverse); 348 | 349 | let pattern = Pattern::parse(&['$', '$']).unwrap(); 350 | assert_eq!(pattern.into_string(), "$"); 351 | assert_eq!(pattern.match_type, MatchType::SuffixExact); 352 | } 353 | 354 | #[test] 355 | fn pattern_parse_specials_3() { 356 | assert!(Pattern::parse(&['!', '^', '$']).is_none()); 357 | 358 | let pattern = Pattern::parse(&['\'', '^', '$']).unwrap(); 359 | assert_eq!(pattern.into_string(), "^"); 360 | assert_eq!(pattern.match_type, MatchType::Exact); 361 | 362 | let pattern = Pattern::parse(&['\'', '!', '$']).unwrap(); 363 | assert_eq!(pattern.into_string(), "!"); 364 | assert_eq!(pattern.match_type, MatchType::Exact); 365 | } 366 | 367 | #[test] 368 | fn pattern_parse_specials_4() { 369 | let pattern = Pattern::parse(&['\'', '^', '$', '$']).unwrap(); 370 | assert_eq!(pattern.into_string(), "^$"); 371 | assert_eq!(pattern.match_type, MatchType::Exact); 372 | } 373 | } 374 | -------------------------------------------------------------------------------- /src/metrics/fzf/scheme.rs: -------------------------------------------------------------------------------- 1 | use super::{bonus, CharClass, Score}; 2 | 3 | /// A type used to tweak the distance algorithm. 4 | /// 5 | /// This enum can be passed to both [`FzfV1`](super::FzfV1) and 6 | /// [`FzfV2`](super::FzfV2) to tweak the distance algorithm based on the type 7 | /// of candidates being searched. 8 | #[derive(Debug, Default, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] 9 | pub enum FzfScheme { 10 | /// A generic distance scheme that works well for any type of input. 11 | #[default] 12 | Default, 13 | 14 | /// A distance scheme tailored for searching file paths. It assigns 15 | /// additional bonus points to the character immediately following a path 16 | /// separator (i.e. `/` on Unix-like systems and `\` on Windows). 17 | Path, 18 | 19 | /// A distance scheme tailored for searching shell command history which 20 | /// doesn't assign any additional bonus points. 21 | History, 22 | } 23 | 24 | impl FzfScheme { 25 | /// TODO: docs 26 | #[inline] 27 | pub(super) fn into_inner(self) -> Scheme { 28 | match self { 29 | Self::Default => DEFAULT, 30 | Self::Path => PATH, 31 | Self::History => HISTORY, 32 | } 33 | } 34 | 35 | /// TODO: docs 36 | #[inline] 37 | pub(super) fn from_inner(scheme: &Scheme) -> Option { 38 | if scheme.bonus_boundary_white == DEFAULT.bonus_boundary_white { 39 | Some(Self::Default) 40 | } else if scheme.bonus_boundary_white == PATH.bonus_boundary_white { 41 | if scheme.initial_char_class == CharClass::Delimiter { 42 | Some(Self::Path) 43 | } else { 44 | Some(Self::History) 45 | } 46 | } else { 47 | None 48 | } 49 | } 50 | } 51 | 52 | /// TODO: docs 53 | #[doc(hidden)] 54 | #[derive(Clone)] 55 | pub struct Scheme { 56 | pub bonus_boundary_white: Score, 57 | pub bonus_boundary_delimiter: Score, 58 | pub(super) initial_char_class: CharClass, 59 | pub(super) is_delimiter: fn(char) -> bool, 60 | } 61 | 62 | impl Default for Scheme { 63 | #[inline] 64 | fn default() -> Self { 65 | DEFAULT 66 | } 67 | } 68 | 69 | /// TODO: docs 70 | pub const DEFAULT: Scheme = Scheme { 71 | bonus_boundary_white: bonus::BOUNDARY + 2, 72 | bonus_boundary_delimiter: bonus::BOUNDARY + 1, 73 | initial_char_class: CharClass::WhiteSpace, 74 | is_delimiter: is_delimiter_default, 75 | }; 76 | 77 | #[inline] 78 | fn is_delimiter_default(ch: char) -> bool { 79 | matches!(ch, '/' | ',' | ':' | ';' | '|') 80 | } 81 | 82 | /// TODO: docs 83 | pub const PATH: Scheme = Scheme { 84 | bonus_boundary_white: bonus::BOUNDARY, 85 | bonus_boundary_delimiter: bonus::BOUNDARY + 1, 86 | initial_char_class: CharClass::Delimiter, 87 | is_delimiter: is_delimiter_path, 88 | }; 89 | 90 | #[inline] 91 | fn is_delimiter_path(ch: char) -> bool { 92 | // Using `std::path::MAIN_SEPARATOR` would force us to depend on `std` 93 | // instead of `core + alloc`, so we use a custom implementation. 94 | #[cfg(windows)] 95 | let os_path_separator = '\\'; 96 | #[cfg(not(windows))] 97 | let os_path_separator = '/'; 98 | 99 | ch == '/' || ch == os_path_separator 100 | } 101 | 102 | /// TODO: docs 103 | pub const HISTORY: Scheme = Scheme { 104 | bonus_boundary_white: bonus::BOUNDARY, 105 | bonus_boundary_delimiter: bonus::BOUNDARY, 106 | initial_char_class: DEFAULT.initial_char_class, 107 | is_delimiter: DEFAULT.is_delimiter, 108 | }; 109 | -------------------------------------------------------------------------------- /src/metrics/fzf/scoring.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | /// TODO: docs 4 | #[derive(Clone, Copy, PartialEq, Eq)] 5 | pub(super) enum CharClass { 6 | /// TODO: docs 7 | WhiteSpace, 8 | 9 | /// TODO: docs 10 | NonWord, 11 | 12 | /// TODO: docs 13 | Delimiter, 14 | 15 | /// TODO: docs 16 | Lower, 17 | 18 | /// TODO: docs 19 | Upper, 20 | 21 | /// TODO: docs 22 | Letter, 23 | 24 | /// TODO: docs 25 | Number, 26 | } 27 | 28 | /// TODO: docs 29 | #[inline] 30 | pub(super) fn char_class(ch: char, scheme: &Scheme) -> CharClass { 31 | if ch.is_ascii() { 32 | ascii_char_class(ch, scheme) 33 | } else { 34 | non_ascii_char_class(ch, scheme) 35 | } 36 | } 37 | 38 | /// TODO: docs 39 | #[inline] 40 | fn ascii_char_class(ch: char, scheme: &Scheme) -> CharClass { 41 | if ch.is_ascii_lowercase() { 42 | CharClass::Lower 43 | } else if ch.is_ascii_uppercase() { 44 | CharClass::Upper 45 | } else if ch.is_ascii_digit() { 46 | CharClass::Number 47 | } else if ch.is_ascii_whitespace() { 48 | CharClass::WhiteSpace 49 | } else if (scheme.is_delimiter)(ch) { 50 | CharClass::Delimiter 51 | } else { 52 | CharClass::NonWord 53 | } 54 | } 55 | 56 | /// TODO: docs 57 | #[inline] 58 | fn non_ascii_char_class(ch: char, scheme: &Scheme) -> CharClass { 59 | if ch.is_lowercase() { 60 | CharClass::Lower 61 | } else if ch.is_uppercase() { 62 | CharClass::Upper 63 | } else if ch.is_numeric() { 64 | CharClass::Number 65 | } else if ch.is_alphabetic() { 66 | CharClass::Letter 67 | } else if ch.is_whitespace() { 68 | CharClass::WhiteSpace 69 | } else if (scheme.is_delimiter)(ch) { 70 | CharClass::Delimiter 71 | } else { 72 | CharClass::NonWord 73 | } 74 | } 75 | 76 | /// TODO: docs 77 | #[inline] 78 | pub(super) fn compute_bonus( 79 | prev_class: CharClass, 80 | next_class: CharClass, 81 | scheme: &Scheme, 82 | ) -> Score { 83 | use CharClass::*; 84 | 85 | match next_class { 86 | NonWord => bonus::NON_WORD, 87 | 88 | WhiteSpace => scheme.bonus_boundary_white, 89 | 90 | Upper if prev_class == Lower => bonus::CAMEL_123, 91 | 92 | Number if prev_class != Number => bonus::CAMEL_123, 93 | 94 | _ => { 95 | if prev_class == WhiteSpace { 96 | scheme.bonus_boundary_white 97 | } else if prev_class == Delimiter { 98 | scheme.bonus_boundary_delimiter 99 | } else if prev_class == NonWord { 100 | bonus::BOUNDARY 101 | } else { 102 | 0 103 | } 104 | }, 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/metrics/fzf/slab.rs: -------------------------------------------------------------------------------- 1 | use core::ops::{Index, IndexMut}; 2 | 3 | use super::Score; 4 | 5 | /// Creating a new [`V2Slab`] allocates 5.25kb on a 64-bit system and 4.25kb on 6 | /// a 32-bit system. 7 | #[derive(Clone, Default)] 8 | pub(super) struct V2Slab { 9 | /// TODO: docs 10 | pub(super) bonus: BonusSlab, 11 | 12 | /// TODO: docs 13 | pub(super) consecutive_matrix: MatrixSlab, 14 | 15 | /// TODO: docs 16 | pub(super) matched_indices: MatchedIndicesSlab, 17 | 18 | /// TODO: docs 19 | pub(super) scoring_matrix: MatrixSlab, 20 | } 21 | 22 | /// TODO: docs 23 | #[derive(Clone, Default)] 24 | pub(super) struct Bonus { 25 | value: u8, 26 | is_set: bool, 27 | } 28 | 29 | impl Bonus { 30 | #[inline(always)] 31 | pub fn is_set(&self) -> bool { 32 | self.is_set 33 | } 34 | 35 | #[inline(always)] 36 | pub fn set(&mut self, value: Score) { 37 | self.value = value as _; 38 | self.is_set = true; 39 | } 40 | 41 | #[inline(always)] 42 | pub fn value(&self) -> Score { 43 | self.value as _ 44 | } 45 | } 46 | 47 | /// Creating a new [`BonusSlab`] allocates 256 bytes. 48 | #[derive(Clone)] 49 | pub(super) struct BonusSlab { 50 | vec: Vec, 51 | } 52 | 53 | impl Default for BonusSlab { 54 | #[inline(always)] 55 | fn default() -> Self { 56 | Self { vec: vec![Bonus::default(); 128] } 57 | } 58 | } 59 | 60 | impl BonusSlab { 61 | /// TODO: docs 62 | #[inline] 63 | pub fn alloc(&mut self, len: usize) -> &mut [Bonus] { 64 | if len > self.vec.len() { 65 | self.vec.resize(len, Bonus::default()); 66 | } 67 | 68 | let slice = &mut self.vec[..len]; 69 | 70 | for bonus in slice.iter_mut() { 71 | bonus.is_set = false; 72 | } 73 | 74 | slice 75 | } 76 | } 77 | 78 | /// Creating a new [`CandidateSlab`] allocates 512 bytes. 79 | #[derive(Clone)] 80 | pub(super) struct CandidateSlab { 81 | chars: Vec, 82 | } 83 | 84 | impl Default for CandidateSlab { 85 | #[inline(always)] 86 | fn default() -> Self { 87 | Self { chars: vec![char::default(); 128] } 88 | } 89 | } 90 | 91 | impl CandidateSlab { 92 | #[inline(always)] 93 | pub fn alloc<'a>(&'a mut self, text: &str) -> &'a [char] { 94 | if text.len() > self.chars.len() { 95 | self.chars.resize(text.len(), char::default()); 96 | } 97 | 98 | let mut char_len = 0; 99 | 100 | for ch in text.chars() { 101 | self.chars[char_len] = ch; 102 | char_len += 1; 103 | } 104 | 105 | &self.chars[..char_len] 106 | } 107 | } 108 | 109 | /// Creating a new [`MatchedIndicesSlab`] allocates 1kb on a 64-bit system. 110 | #[derive(Clone)] 111 | pub(super) struct MatchedIndicesSlab { 112 | vec: Vec, 113 | } 114 | 115 | impl Default for MatchedIndicesSlab { 116 | #[inline] 117 | fn default() -> Self { 118 | Self { vec: vec![0; 128] } 119 | } 120 | } 121 | 122 | impl MatchedIndicesSlab { 123 | #[inline] 124 | /// TODO: docs 125 | pub fn alloc(&mut self, len: usize) -> &mut [usize] { 126 | if len > self.vec.len() { 127 | self.vec.resize(len, 0); 128 | } 129 | 130 | &mut self.vec[..len] 131 | } 132 | } 133 | 134 | pub(super) trait MatrixItem: Copy + Ord + core::fmt::Display { 135 | /// TODO: docs 136 | fn fill() -> Self; 137 | 138 | /// TODO: docs 139 | fn printed_width(&self) -> usize; 140 | } 141 | 142 | impl MatrixItem for Score { 143 | #[inline] 144 | fn fill() -> Self { 145 | 0 146 | } 147 | 148 | fn printed_width(&self) -> usize { 149 | if *self == 0 { 150 | 1 151 | } else { 152 | (self.ilog10() + 1) as usize 153 | } 154 | } 155 | } 156 | 157 | impl MatrixItem for usize { 158 | #[inline] 159 | fn fill() -> Self { 160 | 0 161 | } 162 | 163 | fn printed_width(&self) -> usize { 164 | if *self == 0 { 165 | 1 166 | } else { 167 | (self.ilog10() + 1) as usize 168 | } 169 | } 170 | } 171 | 172 | /// Creating a new [`MatrixSlab`] allocates `256 * size_of::()` bytes. 173 | #[derive(Clone)] 174 | pub(super) struct MatrixSlab { 175 | vec: Vec, 176 | } 177 | 178 | impl Default for MatrixSlab { 179 | #[inline] 180 | fn default() -> MatrixSlab { 181 | // We allocate a 256 cell matrix slab by default to minimize the 182 | // need to re-allocate for long `query * candidate` pairs. 183 | Self { vec: vec![T::default(); 256] } 184 | } 185 | } 186 | 187 | impl MatrixSlab { 188 | /// TODO: docs 189 | #[inline] 190 | pub fn alloc(&mut self, width: usize, height: usize) -> Matrix<'_, T> { 191 | debug_assert!(height * width > 0); 192 | 193 | if height * width > self.vec.len() { 194 | self.vec.resize(height * width, T::fill()); 195 | } 196 | 197 | let slice = &mut self.vec[..height * width]; 198 | 199 | slice.fill(T::fill()); 200 | 201 | Matrix { slice, height, width } 202 | } 203 | } 204 | 205 | /// TODO: docs 206 | pub(super) struct Matrix<'a, T: MatrixItem> { 207 | /// TODO: docs 208 | /// 209 | /// ... 210 | /// \---- height times ----/ 211 | slice: &'a mut [T], 212 | height: usize, 213 | width: usize, 214 | } 215 | 216 | /// Prints the matrix like this: 217 | /// 218 | /// ```text 219 | /// ┌ ┐ 220 | /// │0 16 16 13 12 11 10 9 8│ 221 | /// │0 0 0 0 0 0 0 0 0│ 222 | /// │0 0 0 0 0 0 0 0 0│ 223 | /// └ ┘ 224 | /// ``` 225 | impl core::fmt::Debug for Matrix<'_, T> { 226 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 227 | use core::fmt::Write; 228 | 229 | // The matrix should never be empty, but just in case. 230 | if self.slice.is_empty() { 231 | return f.write_str("[ ]"); 232 | } 233 | 234 | // The character width of the biggest score in the whole matrix. 235 | let max_score_width = { 236 | let max_score = self.slice.iter().copied().max().unwrap(); 237 | max_score.printed_width() 238 | }; 239 | 240 | // The character width of the biggest score in the last column. 241 | let last_col_max_score_width = { 242 | // The cell in the last column of the first row. 243 | let first_row_last_col = 244 | self.cols(self.top_left()).last().unwrap(); 245 | 246 | let last_col_max_score = self 247 | .rows(first_row_last_col) 248 | .map(|cell| self[cell]) 249 | .max() 250 | .unwrap(); 251 | 252 | last_col_max_score.printed_width() 253 | }; 254 | 255 | let printed_matrix_inner_width = (self.width - 1) 256 | * (max_score_width + 1) 257 | + last_col_max_score_width; 258 | 259 | let opening_char: char; 260 | 261 | let closing_char: char; 262 | 263 | if self.height == 1 { 264 | opening_char = '['; 265 | closing_char = ']'; 266 | } else { 267 | f.write_char('┌')?; 268 | f.write_str(&" ".repeat(printed_matrix_inner_width))?; 269 | f.write_char('┐')?; 270 | f.write_char('\n')?; 271 | opening_char = '│'; 272 | closing_char = '│'; 273 | } 274 | 275 | for cell in self.rows(self.top_left()) { 276 | f.write_char(opening_char)?; 277 | 278 | for cell in self.cols(cell) { 279 | let item = self[cell]; 280 | 281 | write!(f, "{item}")?; 282 | 283 | let num_spaces = if self.col_of(cell) + 1 == self.width { 284 | last_col_max_score_width - item.printed_width() 285 | } else { 286 | max_score_width - item.printed_width() + 1 287 | }; 288 | 289 | f.write_str(&" ".repeat(num_spaces))?; 290 | } 291 | 292 | f.write_char(closing_char)?; 293 | 294 | f.write_char('\n')?; 295 | } 296 | 297 | if self.height > 1 { 298 | f.write_char('└')?; 299 | f.write_str(&" ".repeat(printed_matrix_inner_width))?; 300 | f.write_char('┘')?; 301 | } 302 | 303 | Ok(()) 304 | } 305 | } 306 | 307 | impl<'a, T: MatrixItem + Copy> Matrix<'a, T> { 308 | /// TODO: docs 309 | #[inline(always)] 310 | pub fn get_value(&self, cell: MatrixCell) -> Option { 311 | self.slice.get(cell.0).copied() 312 | } 313 | } 314 | 315 | impl<'a, T: MatrixItem> Matrix<'a, T> { 316 | /// TODO: docs 317 | #[inline] 318 | pub fn col_of(&self, cell: MatrixCell) -> usize { 319 | cell.0 % self.width 320 | } 321 | 322 | /// TODO: docs 323 | #[inline] 324 | pub fn cols(&self, starting_from: MatrixCell) -> Cols { 325 | Cols { 326 | next: starting_from, 327 | remaining: self.width - self.col_of(starting_from), 328 | } 329 | } 330 | 331 | /// TODO: docs 332 | #[inline] 333 | pub fn down_right(&self, cell: MatrixCell) -> MatrixCell { 334 | MatrixCell(cell.0 + self.width + 1) 335 | } 336 | 337 | /// TODO: docs 338 | #[inline(always)] 339 | pub fn height(&self) -> usize { 340 | self.height 341 | } 342 | 343 | /// TODO: docs 344 | #[inline] 345 | pub fn left(&self, cell: MatrixCell) -> MatrixCell { 346 | MatrixCell(cell.0 - 1) 347 | } 348 | 349 | /// TODO: docs 350 | #[inline] 351 | pub fn row_of(&self, cell: MatrixCell) -> usize { 352 | cell.0 / self.width 353 | } 354 | 355 | /// TODO: docs 356 | #[inline] 357 | pub fn row_mut(&mut self, row: usize) -> &mut [T] { 358 | let start = row * self.width; 359 | let end = start + self.width; 360 | &mut self.slice[start..end] 361 | } 362 | 363 | #[inline] 364 | pub fn rows(&self, starting_from: MatrixCell) -> Rows { 365 | Rows { 366 | next: starting_from, 367 | matrix_width: self.width, 368 | remaining: self.height - self.row_of(starting_from), 369 | } 370 | } 371 | 372 | /// TODO: docs 373 | #[inline] 374 | pub fn top_left(&self) -> MatrixCell { 375 | MatrixCell(0) 376 | } 377 | 378 | /// TODO: docs 379 | #[inline] 380 | pub fn two_rows_mut( 381 | &mut self, 382 | row_idx_a: usize, 383 | row_idx_b: usize, 384 | ) -> (&mut Row, &mut Row) { 385 | debug_assert!(row_idx_a < row_idx_b); 386 | 387 | let start_b = row_idx_b * self.width; 388 | 389 | let (part_a, part_b) = self.slice.split_at_mut(start_b); 390 | 391 | let start_a = row_idx_a * self.width; 392 | 393 | (&mut part_a[start_a..start_a + self.width], &mut part_b[..self.width]) 394 | } 395 | 396 | #[inline] 397 | pub fn up_left(&self, cell: MatrixCell) -> MatrixCell { 398 | MatrixCell(cell.0 - self.width - 1) 399 | } 400 | 401 | /// TODO: docs 402 | #[inline(always)] 403 | pub fn width(&self) -> usize { 404 | self.width 405 | } 406 | } 407 | 408 | pub(super) type Row = [T]; 409 | 410 | #[derive(Debug, Clone, Copy)] 411 | pub(super) struct MatrixCell(pub(super) usize); 412 | 413 | impl Index for Matrix<'_, T> { 414 | type Output = T; 415 | 416 | #[inline] 417 | fn index(&self, index: MatrixCell) -> &Self::Output { 418 | &self.slice[index.0] 419 | } 420 | } 421 | 422 | impl IndexMut for Matrix<'_, T> { 423 | #[inline] 424 | fn index_mut(&mut self, index: MatrixCell) -> &mut Self::Output { 425 | &mut self.slice[index.0] 426 | } 427 | } 428 | 429 | /// TODO: docs 430 | pub(super) struct Cols { 431 | next: MatrixCell, 432 | remaining: usize, 433 | } 434 | 435 | impl Iterator for Cols { 436 | type Item = MatrixCell; 437 | 438 | #[inline] 439 | fn next(&mut self) -> Option { 440 | if self.remaining == 0 { 441 | return None; 442 | } 443 | let this = self.next; 444 | self.next.0 += 1; 445 | self.remaining -= 1; 446 | Some(this) 447 | } 448 | } 449 | 450 | /// TODO: docs 451 | pub(super) struct Rows { 452 | next: MatrixCell, 453 | matrix_width: usize, 454 | remaining: usize, 455 | } 456 | 457 | impl Iterator for Rows { 458 | type Item = MatrixCell; 459 | 460 | #[inline] 461 | fn next(&mut self) -> Option { 462 | if self.remaining == 0 { 463 | return None; 464 | } 465 | let this = self.next; 466 | self.next.0 += self.matrix_width; 467 | self.remaining -= 1; 468 | Some(this) 469 | } 470 | } 471 | -------------------------------------------------------------------------------- /src/metrics/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(feature = "fzf-v1", feature = "fzf-v2"))] 2 | #[cfg_attr(docsrs, doc(cfg(any(feature = "fzf-v1", feature = "fzf-v2"))))] 3 | pub mod fzf; 4 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::normalize::{is_normalized, normalize}; 2 | 3 | /// TODO: docs 4 | pub(crate) type CharEq = fn(char, char) -> bool; 5 | 6 | /// TODO: docs 7 | const ASCII_CASE_MASK: u8 = 0b0010_0000; 8 | 9 | /// TODO: docs 10 | #[inline(always)] 11 | pub fn ascii_letter_flip_case(ascii_letter: u8) -> u8 { 12 | debug_assert!(ascii_letter.is_ascii_alphabetic()); 13 | ascii_letter ^ ASCII_CASE_MASK 14 | } 15 | 16 | #[inline(always)] 17 | pub fn case_insensitive_eq(lhs: char, rhs: char) -> bool { 18 | lhs.eq_ignore_ascii_case(&rhs) 19 | } 20 | 21 | #[inline(always)] 22 | pub fn case_insensitive_normalized_eq(lhs: char, rhs: char) -> bool { 23 | lhs.eq_ignore_ascii_case(&normalize_candidate_char(lhs, rhs)) 24 | } 25 | 26 | #[inline(always)] 27 | pub fn case_sensitive_eq(lhs: char, rhs: char) -> bool { 28 | lhs == rhs 29 | } 30 | 31 | #[inline(always)] 32 | pub fn case_sensitive_normalized_eq(lhs: char, rhs: char) -> bool { 33 | lhs == normalize_candidate_char(lhs, rhs) 34 | } 35 | 36 | #[inline(always)] 37 | pub fn char_eq(is_case_sensitive: bool, normalize_candidate: bool) -> CharEq { 38 | match (is_case_sensitive, normalize_candidate) { 39 | (false, false) => case_insensitive_eq, 40 | (true, false) => case_sensitive_eq, 41 | (false, true) => case_insensitive_normalized_eq, 42 | (true, true) => case_sensitive_normalized_eq, 43 | } 44 | } 45 | 46 | /// TODO: docs 47 | #[inline(always)] 48 | fn leading_spaces(s: &str) -> usize { 49 | s.bytes().take_while(|&b| b == b' ').count() 50 | } 51 | 52 | /// TODO: docs 53 | #[inline(always)] 54 | fn normalize_candidate_char(query_char: char, candidate_char: char) -> char { 55 | if is_normalized(query_char) { 56 | normalize(candidate_char) 57 | } else { 58 | candidate_char 59 | } 60 | } 61 | 62 | /// TODO: docs 63 | #[inline(always)] 64 | pub fn strip_leading_spaces(s: &str) -> &str { 65 | &s[leading_spaces(s)..] 66 | } 67 | -------------------------------------------------------------------------------- /tests/fzf_common.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::single_range_in_vec_init)] 2 | 3 | use norm::fzf::{bonus, penalty}; 4 | use norm::CaseSensitivity; 5 | use CaseSensitivity::*; 6 | 7 | pub fn upstream_empty() { 8 | let (_, m) = fzf::(Insensitive, "", "foo"); 9 | 10 | let m = m.unwrap(); 11 | 12 | assert_eq!(m.distance.into_score(), 0); 13 | 14 | assert!(m.matched_ranges.is_empty()); 15 | } 16 | 17 | pub fn upstream_fuzzy_1() { 18 | let (_, m) = fzf::(Insensitive, "oBZ", "fooBarbaz1"); 19 | 20 | let m = m.unwrap(); 21 | 22 | assert_eq!( 23 | m.distance.into_score(), 24 | 3 * bonus::MATCH + bonus::CAMEL_123 25 | - penalty::GAP_START 26 | - 3 * penalty::GAP_EXTENSION 27 | ); 28 | 29 | assert_eq!(m.matched_ranges, [2..4, 8..9]); 30 | } 31 | 32 | pub fn upstream_fuzzy_2() { 33 | let (fzf, m) = fzf::(Insensitive, "fbb", "foo bar baz"); 34 | 35 | let m = m.unwrap(); 36 | 37 | assert_eq!( 38 | m.distance.into_score(), 39 | 3 * bonus::MATCH 40 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 41 | * fzf.scheme().bonus_boundary_white 42 | - 2 * penalty::GAP_START 43 | - 4 * penalty::GAP_EXTENSION 44 | ); 45 | 46 | assert_eq!(m.matched_ranges, [0..1, 4..5, 8..9]); 47 | } 48 | 49 | pub fn upstream_fuzzy_3() { 50 | let (_, m) = fzf::(Insensitive, "rdoc", "/AutomatorDocument.icns"); 51 | 52 | let m = m.unwrap(); 53 | 54 | assert_eq!( 55 | m.distance.into_score(), 56 | 4 * bonus::MATCH + 2 * bonus::CONSECUTIVE + bonus::CAMEL_123 57 | ); 58 | 59 | assert_eq!(m.matched_ranges, [9..13]); 60 | } 61 | 62 | pub fn upstream_fuzzy_4() { 63 | let (fzf, m) = fzf::(Insensitive, "zshc", "/man1/zshcompctl.1"); 64 | 65 | let m = m.unwrap(); 66 | 67 | assert_eq!( 68 | m.distance.into_score(), 69 | 4 * bonus::MATCH 70 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) 71 | * fzf.scheme().bonus_boundary_delimiter 72 | ); 73 | 74 | assert_eq!(m.matched_ranges, [6..10]); 75 | } 76 | 77 | pub fn upstream_fuzzy_5() { 78 | let (fzf, m) = fzf::(Insensitive, "zshc", "/.oh-my-zsh/cache"); 79 | 80 | let m = m.unwrap(); 81 | 82 | assert_eq!( 83 | m.distance.into_score(), 84 | 4 * bonus::MATCH 85 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::BOUNDARY 86 | + fzf.scheme().bonus_boundary_delimiter 87 | - penalty::GAP_START 88 | ); 89 | 90 | assert_eq!(m.matched_ranges, [8..11, 12..13]); 91 | } 92 | 93 | pub fn upstream_fuzzy_6() { 94 | let (_, m) = fzf::(Insensitive, "12356", "ab0123 456"); 95 | 96 | let m = m.unwrap(); 97 | 98 | assert_eq!( 99 | m.distance.into_score(), 100 | 5 * bonus::MATCH + 3 * bonus::CONSECUTIVE 101 | - penalty::GAP_START 102 | - penalty::GAP_EXTENSION 103 | ); 104 | 105 | assert_eq!(m.matched_ranges, [3..6, 8..10]); 106 | } 107 | 108 | pub fn upstream_fuzzy_7() { 109 | let (_, m) = fzf::(Insensitive, "12356", "abc123 456"); 110 | 111 | let m = m.unwrap(); 112 | 113 | assert_eq!( 114 | m.distance.into_score(), 115 | 5 * bonus::MATCH 116 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 117 | + bonus::CONSECUTIVE 118 | - penalty::GAP_START 119 | - penalty::GAP_EXTENSION 120 | ); 121 | 122 | assert_eq!(m.matched_ranges, [3..6, 8..10]); 123 | } 124 | 125 | pub fn upstream_fuzzy_8() { 126 | let (fzf, m) = fzf::(Insensitive, "fbb", "foo/bar/baz"); 127 | 128 | let m = m.unwrap(); 129 | 130 | assert_eq!( 131 | m.distance.into_score(), 132 | 3 * bonus::MATCH 133 | + bonus::FIRST_QUERY_CHAR_MULTIPLIER 134 | * fzf.scheme().bonus_boundary_white 135 | + 2 * fzf.scheme().bonus_boundary_delimiter 136 | - 2 * penalty::GAP_START 137 | - 4 * penalty::GAP_EXTENSION 138 | ); 139 | 140 | assert_eq!(m.matched_ranges, [0..1, 4..5, 8..9]); 141 | } 142 | 143 | pub fn upstream_fuzzy_9() { 144 | let (fzf, m) = fzf::(Insensitive, "fbb", "fooBarBaz"); 145 | 146 | let m = m.unwrap(); 147 | 148 | assert_eq!( 149 | m.distance.into_score(), 150 | 3 * bonus::MATCH 151 | + bonus::FIRST_QUERY_CHAR_MULTIPLIER 152 | * fzf.scheme().bonus_boundary_white 153 | + 2 * bonus::CAMEL_123 154 | - 2 * penalty::GAP_START 155 | - 2 * penalty::GAP_EXTENSION 156 | ); 157 | 158 | assert_eq!(m.matched_ranges, [0..1, 3..4, 6..7]); 159 | } 160 | 161 | pub fn upstream_fuzzy_10() { 162 | let (fzf, m) = fzf::(Insensitive, "fbb", "foo barbaz"); 163 | 164 | let m = m.unwrap(); 165 | 166 | assert_eq!( 167 | m.distance.into_score(), 168 | 3 * bonus::MATCH 169 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 1) 170 | * fzf.scheme().bonus_boundary_white 171 | - 2 * penalty::GAP_START 172 | - 3 * penalty::GAP_EXTENSION 173 | ); 174 | 175 | assert_eq!(m.matched_ranges, [0..1, 4..5, 7..8]); 176 | } 177 | 178 | pub fn upstream_fuzzy_11() { 179 | let (fzf, m) = fzf::(Insensitive, "foob", "fooBar Baz"); 180 | 181 | let m = m.unwrap(); 182 | 183 | assert_eq!( 184 | m.distance.into_score(), 185 | 4 * bonus::MATCH 186 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) 187 | * fzf.scheme().bonus_boundary_white 188 | ); 189 | 190 | assert_eq!(m.matched_ranges, [0..4]); 191 | } 192 | 193 | pub fn upstream_fuzzy_12() { 194 | let (_, m) = fzf::(Insensitive, "foo-b", "xFoo-Bar Baz"); 195 | 196 | let m = m.unwrap(); 197 | 198 | assert_eq!( 199 | m.distance.into_score(), 200 | 5 * bonus::MATCH 201 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 202 | + bonus::NON_WORD 203 | + bonus::BOUNDARY 204 | ); 205 | 206 | assert_eq!(m.matched_ranges, [1..6]); 207 | } 208 | 209 | pub fn upstream_fuzzy_13() { 210 | let (_, m) = fzf::(Sensitive, "oBz", "fooBarbaz"); 211 | 212 | let m = m.unwrap(); 213 | 214 | assert_eq!( 215 | m.distance.into_score(), 216 | 3 * bonus::MATCH + bonus::CAMEL_123 217 | - penalty::GAP_START 218 | - 3 * penalty::GAP_EXTENSION 219 | ); 220 | 221 | assert_eq!(m.matched_ranges, [2..4, 8..9]); 222 | } 223 | 224 | pub fn upstream_fuzzy_14() { 225 | let (fzf, m) = fzf::(Sensitive, "FBB", "Foo/Bar/Baz"); 226 | 227 | let m = m.unwrap(); 228 | 229 | assert_eq!( 230 | m.distance.into_score(), 231 | 3 * bonus::MATCH 232 | + bonus::FIRST_QUERY_CHAR_MULTIPLIER 233 | * fzf.scheme().bonus_boundary_white 234 | + 2 * fzf.scheme().bonus_boundary_delimiter 235 | - 2 * penalty::GAP_START 236 | - 4 * penalty::GAP_EXTENSION 237 | ); 238 | 239 | assert_eq!(m.matched_ranges, [0..1, 4..5, 8..9]); 240 | } 241 | 242 | pub fn upstream_fuzzy_15() { 243 | let (fzf, m) = fzf::(Sensitive, "FBB", "FooBarBaz"); 244 | 245 | let m = m.unwrap(); 246 | 247 | assert_eq!( 248 | m.distance.into_score(), 249 | 3 * bonus::MATCH 250 | + bonus::FIRST_QUERY_CHAR_MULTIPLIER 251 | * fzf.scheme().bonus_boundary_white 252 | + 2 * bonus::CAMEL_123 253 | - 2 * penalty::GAP_START 254 | - 2 * penalty::GAP_EXTENSION 255 | ); 256 | 257 | assert_eq!(m.matched_ranges, [0..1, 3..4, 6..7]); 258 | } 259 | 260 | pub fn upstream_fuzzy_16() { 261 | let (fzf, m) = fzf::(Sensitive, "FooB", "FooBar Baz"); 262 | 263 | let m = m.unwrap(); 264 | 265 | assert_eq!( 266 | m.distance.into_score(), 267 | 4 * bonus::MATCH 268 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 269 | * fzf.scheme().bonus_boundary_white 270 | + bonus::CAMEL_123.max(fzf.scheme().bonus_boundary_white) 271 | ); 272 | 273 | assert_eq!(m.matched_ranges, [0..4]); 274 | } 275 | 276 | pub fn upstream_fuzzy_17() { 277 | let (_, m) = fzf::(Sensitive, "o-ba", "foo-bar"); 278 | 279 | let m = m.unwrap(); 280 | 281 | assert_eq!( 282 | m.distance.into_score(), 283 | 4 * bonus::MATCH + 3 * bonus::BOUNDARY 284 | ); 285 | 286 | assert_eq!(m.matched_ranges, [2..6]); 287 | } 288 | 289 | pub fn upstream_fuzzy_18() { 290 | let (_, m) = fzf::(Sensitive, "oBZ", "fooBarbaz"); 291 | assert!(m.is_none()); 292 | } 293 | 294 | pub fn upstream_fuzzy_19() { 295 | let (_, m) = fzf::(Sensitive, "fbb", "Foo Bar Baz"); 296 | assert!(m.is_none()); 297 | } 298 | 299 | pub fn upstream_fuzzy_20() { 300 | let (_, m) = fzf::(Sensitive, "fooBarbazz", "fooBarbaz"); 301 | assert!(m.is_none()); 302 | } 303 | 304 | pub fn upstream_exact_1() { 305 | let (_, m) = fzf::(Sensitive, "'oBA", "fooBarbaz"); 306 | assert!(m.is_none()); 307 | } 308 | 309 | pub fn upstream_exact_2() { 310 | let (_, m) = fzf::(Sensitive, "'fooBarbazz", "fooBarbaz"); 311 | assert!(m.is_none()); 312 | } 313 | 314 | pub fn upstream_exact_3() { 315 | let (_, m) = fzf::(Insensitive, "'oBA", "fooBarbaz"); 316 | 317 | let m = m.unwrap(); 318 | 319 | assert_eq!( 320 | m.distance.into_score(), 321 | 3 * bonus::MATCH + bonus::CAMEL_123 + bonus::CONSECUTIVE 322 | ); 323 | 324 | assert_eq!(m.matched_ranges, [2..5]); 325 | } 326 | 327 | pub fn upstream_exact_4() { 328 | let (_, m) = fzf::(Insensitive, "'rdoc", "/AutomatorDocument.icns"); 329 | 330 | let m = m.unwrap(); 331 | 332 | assert_eq!( 333 | m.distance.into_score(), 334 | 4 * bonus::MATCH + bonus::CAMEL_123 + 2 * bonus::CONSECUTIVE 335 | ); 336 | 337 | assert_eq!(m.matched_ranges, [9..13]); 338 | } 339 | 340 | pub fn upstream_exact_5() { 341 | let (fzf, m) = fzf::(Insensitive, "'zshc", "/man1/zshcompctl.1"); 342 | 343 | let m = m.unwrap(); 344 | 345 | assert_eq!( 346 | m.distance.into_score(), 347 | 4 * bonus::MATCH 348 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) 349 | * fzf.scheme().bonus_boundary_delimiter 350 | ); 351 | 352 | assert_eq!(m.matched_ranges, [6..10]); 353 | } 354 | 355 | pub fn upstream_exact_6() { 356 | let (fzf, m) = fzf::(Insensitive, "'zsh/c", "/.oh-my-zsh/cache"); 357 | 358 | let m = m.unwrap(); 359 | 360 | assert_eq!( 361 | m.distance.into_score(), 362 | 5 * bonus::MATCH 363 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 3) * bonus::BOUNDARY 364 | + fzf.scheme().bonus_boundary_delimiter 365 | ); 366 | 367 | assert_eq!(m.matched_ranges, [8..13]); 368 | } 369 | 370 | pub fn upstream_exact_7() { 371 | let (_, m) = fzf::(Insensitive, "'oo", "foobar foo"); 372 | 373 | let m = m.unwrap(); 374 | 375 | assert_eq!(m.distance.into_score(), 2 * bonus::MATCH + bonus::CONSECUTIVE); 376 | 377 | assert_eq!(m.matched_ranges, [1..3]); 378 | } 379 | 380 | pub fn upstream_prefix_1() { 381 | let (_, m) = fzf::(Sensitive, "^Foo", "fooBarbaz"); 382 | assert!(m.is_none()); 383 | } 384 | 385 | pub fn upstream_prefix_2() { 386 | let (_, m) = fzf::(Sensitive, "^baz", "fooBarBaz"); 387 | assert!(m.is_none()); 388 | } 389 | 390 | pub fn upstream_prefix_3() { 391 | let (fzf, m) = fzf::(Insensitive, "^Foo", "fooBarbaz"); 392 | 393 | let m = m.unwrap(); 394 | 395 | assert_eq!( 396 | m.distance.into_score(), 397 | 3 * bonus::MATCH 398 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 399 | * fzf.scheme().bonus_boundary_white 400 | ); 401 | 402 | assert_eq!(m.matched_ranges, [0..3]); 403 | } 404 | 405 | pub fn upstream_prefix_4() { 406 | let (fzf, m) = fzf::(Insensitive, "^foo", "foOBarBaZ"); 407 | 408 | let m = m.unwrap(); 409 | 410 | assert_eq!( 411 | m.distance.into_score(), 412 | 3 * bonus::MATCH 413 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 414 | * fzf.scheme().bonus_boundary_white 415 | ); 416 | 417 | assert_eq!(m.matched_ranges, [0..3]); 418 | } 419 | 420 | pub fn upstream_prefix_5() { 421 | let (fzf, m) = fzf::(Insensitive, "^f-o", "f-oBarbaz"); 422 | 423 | let m = m.unwrap(); 424 | 425 | assert_eq!( 426 | m.distance.into_score(), 427 | 3 * bonus::MATCH 428 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 429 | * fzf.scheme().bonus_boundary_white 430 | ); 431 | 432 | assert_eq!(m.matched_ranges, [0..3]); 433 | } 434 | 435 | pub fn upstream_prefix_6() { 436 | let (fzf, m) = fzf::(Insensitive, "^foo", " fooBar"); 437 | 438 | let m = m.unwrap(); 439 | 440 | assert_eq!( 441 | m.distance.into_score(), 442 | 3 * bonus::MATCH 443 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 444 | * fzf.scheme().bonus_boundary_white 445 | ); 446 | 447 | assert_eq!(m.matched_ranges, [1..4]); 448 | } 449 | 450 | pub fn upstream_prefix_7() { 451 | let (fzf, m) = fzf::(Insensitive, "\\ fo", " fooBar"); 452 | 453 | let m = m.unwrap(); 454 | 455 | assert_eq!( 456 | m.distance.into_score(), 457 | 3 * bonus::MATCH 458 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) 459 | * fzf.scheme().bonus_boundary_white 460 | ); 461 | 462 | assert_eq!(m.matched_ranges, [0..3]); 463 | } 464 | 465 | pub fn upstream_prefix_8() { 466 | let (_, m) = fzf::(Insensitive, "^foo", " fo"); 467 | assert!(m.is_none()); 468 | } 469 | 470 | pub fn upstream_suffix_1() { 471 | let (_, m) = fzf::(Sensitive, "Baz$", "fooBarbaz"); 472 | assert!(m.is_none()); 473 | } 474 | 475 | pub fn upstream_suffix_2() { 476 | let (_, m) = fzf::(Insensitive, "Foo$", "fooBarBaz"); 477 | assert!(m.is_none()); 478 | } 479 | 480 | pub fn upstream_suffix_3() { 481 | let (_, m) = fzf::(Insensitive, "baz$", "fooBarbaz"); 482 | 483 | let m = m.unwrap(); 484 | 485 | assert_eq!( 486 | m.distance.into_score(), 487 | 3 * bonus::MATCH + 2 * bonus::CONSECUTIVE 488 | ); 489 | 490 | assert_eq!(m.matched_ranges, [6..9]); 491 | } 492 | 493 | pub fn upstream_suffix_4() { 494 | let (_, m) = fzf::(Insensitive, "baz$", "fooBarBaZ"); 495 | 496 | let m = m.unwrap(); 497 | 498 | assert_eq!( 499 | m.distance.into_score(), 500 | 3 * bonus::MATCH 501 | + (bonus::FIRST_QUERY_CHAR_MULTIPLIER + 2) * bonus::CAMEL_123 502 | ); 503 | 504 | assert_eq!(m.matched_ranges, [6..9]); 505 | } 506 | 507 | pub fn upstream_suffix_5() { 508 | let (_, m) = fzf::(Insensitive, "baz$", "fooBarbaz "); 509 | 510 | let m = m.unwrap(); 511 | 512 | assert_eq!( 513 | m.distance.into_score(), 514 | 3 * bonus::MATCH + 2 * bonus::CONSECUTIVE 515 | ); 516 | 517 | assert_eq!(m.matched_ranges, [6..9]); 518 | } 519 | 520 | pub fn upstream_suffix_6() { 521 | let (fzf, m) = fzf::(Insensitive, "baz\\ $", "fooBarbaz "); 522 | 523 | let m = m.unwrap(); 524 | 525 | assert_eq!( 526 | m.distance.into_score(), 527 | 4 * bonus::MATCH 528 | + 2 * bonus::CONSECUTIVE 529 | + fzf.scheme().bonus_boundary_white 530 | ); 531 | 532 | assert_eq!(m.matched_ranges, [6..10]); 533 | } 534 | 535 | pub use utils::*; 536 | 537 | mod utils { 538 | use core::ops::Range; 539 | 540 | pub struct FzfMatch { 541 | pub distance: FzfDistance, 542 | pub matched_ranges: Vec>, 543 | } 544 | 545 | use norm::fzf::{FzfDistance, FzfParser, FzfQuery, FzfV1, FzfV2}; 546 | use norm::{CaseSensitivity, Metric}; 547 | 548 | pub trait Fzf: 549 | Default 550 | + for<'a> Metric = FzfQuery<'a>, Distance = FzfDistance> 551 | { 552 | fn set_case_sensitivity( 553 | &mut self, 554 | case_sensitivity: CaseSensitivity, 555 | ) -> &mut Self; 556 | 557 | fn scheme(&self) -> &norm::fzf::Scheme; 558 | } 559 | 560 | impl Fzf for FzfV1 { 561 | fn set_case_sensitivity( 562 | &mut self, 563 | case_sensitivity: CaseSensitivity, 564 | ) -> &mut Self { 565 | self.set_case_sensitivity(case_sensitivity) 566 | } 567 | 568 | fn scheme(&self) -> &norm::fzf::Scheme { 569 | #[cfg(feature = "__tests")] 570 | { 571 | self.scheme() 572 | } 573 | 574 | #[cfg(not(feature = "__tests"))] 575 | { 576 | unreachable!() 577 | } 578 | } 579 | } 580 | 581 | impl Fzf for FzfV2 { 582 | fn set_case_sensitivity( 583 | &mut self, 584 | case_sensitivity: CaseSensitivity, 585 | ) -> &mut Self { 586 | self.set_case_sensitivity(case_sensitivity) 587 | } 588 | 589 | fn scheme(&self) -> &norm::fzf::Scheme { 590 | #[cfg(feature = "__tests")] 591 | { 592 | self.scheme() 593 | } 594 | 595 | #[cfg(not(feature = "__tests"))] 596 | { 597 | unreachable!() 598 | } 599 | } 600 | } 601 | 602 | pub(super) fn fzf( 603 | case_sensitivity: CaseSensitivity, 604 | query: &str, 605 | candidate: &str, 606 | ) -> (F, Option) { 607 | let mut fzf = F::default(); 608 | 609 | fzf.set_case_sensitivity(case_sensitivity); 610 | 611 | let mut parser = FzfParser::new(); 612 | 613 | let mut ranges = Vec::new(); 614 | 615 | let Some(distance) = fzf.distance_and_ranges( 616 | parser.parse(query), 617 | candidate, 618 | &mut ranges, 619 | ) else { 620 | return (fzf, None); 621 | }; 622 | 623 | (fzf, Some(FzfMatch { distance, matched_ranges: ranges })) 624 | } 625 | } 626 | -------------------------------------------------------------------------------- /tests/fzf_v1.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::single_range_in_vec_init)] 2 | 3 | mod fzf_common; 4 | 5 | use fzf_common as common; 6 | use norm::fzf::{FzfParser, FzfV1}; 7 | use norm::{CaseSensitivity, Metric}; 8 | 9 | #[test] 10 | fn fzf_v1_upstream_empty() { 11 | common::upstream_empty::(); 12 | } 13 | 14 | #[test] 15 | fn fzf_v1_upstream_exact_1() { 16 | common::upstream_exact_1::(); 17 | } 18 | 19 | #[test] 20 | fn fzf_v1_upstream_exact_2() { 21 | common::upstream_exact_2::(); 22 | } 23 | 24 | #[test] 25 | fn fzf_v1_upstream_exact_3() { 26 | common::upstream_exact_3::() 27 | } 28 | 29 | #[test] 30 | fn fzf_v1_upstream_exact_4() { 31 | common::upstream_exact_4::() 32 | } 33 | 34 | #[test] 35 | fn fzf_v1_upstream_exact_5() { 36 | common::upstream_exact_5::() 37 | } 38 | 39 | #[test] 40 | fn fzf_v1_upstream_exact_6() { 41 | common::upstream_exact_6::() 42 | } 43 | 44 | #[test] 45 | fn fzf_v1_upstream_exact_7() { 46 | common::upstream_exact_7::() 47 | } 48 | 49 | #[test] 50 | fn fzf_v1_upstream_fuzzy_1() { 51 | common::upstream_fuzzy_1::(); 52 | } 53 | 54 | #[test] 55 | fn fzf_v1_upstream_fuzzy_2() { 56 | common::upstream_fuzzy_2::(); 57 | } 58 | 59 | #[test] 60 | fn fzf_v1_upstream_fuzzy_3() { 61 | common::upstream_fuzzy_3::() 62 | } 63 | 64 | #[test] 65 | fn fzf_v1_upstream_fuzzy_4() { 66 | common::upstream_fuzzy_4::() 67 | } 68 | 69 | #[test] 70 | fn fzf_v1_upstream_fuzzy_5() { 71 | common::upstream_fuzzy_5::() 72 | } 73 | 74 | #[test] 75 | fn fzf_v1_upstream_fuzzy_6() { 76 | common::upstream_fuzzy_6::() 77 | } 78 | 79 | #[test] 80 | fn fzf_v1_upstream_fuzzy_7() { 81 | common::upstream_fuzzy_7::() 82 | } 83 | 84 | #[test] 85 | fn fzf_v1_upstream_fuzzy_8() { 86 | common::upstream_fuzzy_8::(); 87 | } 88 | 89 | #[test] 90 | fn fzf_v1_upstream_fuzzy_9() { 91 | common::upstream_fuzzy_9::(); 92 | } 93 | 94 | #[test] 95 | fn fzf_v1_upstream_fuzzy_10() { 96 | common::upstream_fuzzy_10::(); 97 | } 98 | 99 | #[test] 100 | fn fzf_v1_upstream_fuzzy_11() { 101 | common::upstream_fuzzy_11::(); 102 | } 103 | 104 | #[test] 105 | fn fzf_v1_upstream_fuzzy_12() { 106 | common::upstream_fuzzy_12::(); 107 | } 108 | 109 | #[test] 110 | fn fzf_v1_upstream_fuzzy_13() { 111 | common::upstream_fuzzy_13::(); 112 | } 113 | 114 | #[test] 115 | fn fzf_v1_upstream_fuzzy_14() { 116 | common::upstream_fuzzy_14::(); 117 | } 118 | 119 | #[test] 120 | fn fzf_v1_upstream_fuzzy_15() { 121 | common::upstream_fuzzy_15::(); 122 | } 123 | 124 | #[test] 125 | fn fzf_v1_upstream_fuzzy_16() { 126 | common::upstream_fuzzy_16::(); 127 | } 128 | 129 | #[test] 130 | fn fzf_v1_upstream_fuzzy_17() { 131 | common::upstream_fuzzy_17::(); 132 | } 133 | 134 | #[test] 135 | fn fzf_v1_upstream_fuzzy_18() { 136 | common::upstream_fuzzy_18::(); 137 | } 138 | 139 | #[test] 140 | fn fzf_v1_upstream_fuzzy_19() { 141 | common::upstream_fuzzy_19::(); 142 | } 143 | 144 | #[test] 145 | fn fzf_v1_upstream_fuzzy_20() { 146 | common::upstream_fuzzy_20::(); 147 | } 148 | 149 | #[test] 150 | fn fzf_v1_upstream_prefix_1() { 151 | common::upstream_prefix_1::(); 152 | } 153 | 154 | #[test] 155 | fn fzf_v1_upstream_prefix_2() { 156 | common::upstream_prefix_2::(); 157 | } 158 | 159 | #[test] 160 | fn fzf_v1_upstream_prefix_3() { 161 | common::upstream_prefix_3::() 162 | } 163 | 164 | #[test] 165 | fn fzf_v1_upstream_prefix_4() { 166 | common::upstream_prefix_4::() 167 | } 168 | 169 | #[test] 170 | fn fzf_v1_upstream_prefix_5() { 171 | common::upstream_prefix_5::() 172 | } 173 | 174 | #[test] 175 | fn fzf_v1_upstream_prefix_6() { 176 | common::upstream_prefix_6::() 177 | } 178 | 179 | #[test] 180 | fn fzf_v1_upstream_prefix_7() { 181 | common::upstream_prefix_7::() 182 | } 183 | 184 | #[test] 185 | fn fzf_v1_upstream_prefix_8() { 186 | common::upstream_prefix_8::() 187 | } 188 | 189 | #[test] 190 | fn fzf_v1_upstream_suffix_1() { 191 | common::upstream_suffix_1::(); 192 | } 193 | 194 | #[test] 195 | fn fzf_v1_upstream_suffix_2() { 196 | common::upstream_suffix_2::(); 197 | } 198 | 199 | #[test] 200 | fn fzf_v1_upstream_suffix_3() { 201 | common::upstream_suffix_3::() 202 | } 203 | 204 | #[test] 205 | fn fzf_v1_upstream_suffix_4() { 206 | common::upstream_suffix_4::() 207 | } 208 | 209 | #[test] 210 | fn fzf_v1_upstream_suffix_5() { 211 | common::upstream_suffix_5::() 212 | } 213 | 214 | #[test] 215 | fn fzf_v1_upstream_suffix_6() { 216 | common::upstream_suffix_6::() 217 | } 218 | 219 | #[test] 220 | fn fzf_v1_score_1() { 221 | let mut fzf = FzfV1::new(); 222 | 223 | let mut parser = FzfParser::new(); 224 | 225 | let mut ranges = Vec::new(); 226 | 227 | let _ = fzf 228 | .set_case_sensitivity(CaseSensitivity::Sensitive) 229 | .distance_and_ranges(parser.parse("ZZ"), "ӥZZZ", &mut ranges) 230 | .unwrap(); 231 | 232 | assert_eq!(ranges, [2..4]); 233 | } 234 | 235 | #[test] 236 | fn fzf_v1_score_2() { 237 | let mut fzf = FzfV1::new(); 238 | 239 | let mut parser = FzfParser::new(); 240 | 241 | let query = parser.parse("^\\$ ]]%]]'\0\0\0\0\0\0"); 242 | 243 | let mach = fzf 244 | .set_case_sensitivity(CaseSensitivity::Sensitive) 245 | .distance(query, "\0"); 246 | 247 | assert!(mach.is_none()); 248 | } 249 | 250 | #[test] 251 | fn fzf_v1_score_3() { 252 | let mut fzf = FzfV1::new(); 253 | 254 | let mut parser = FzfParser::new(); 255 | 256 | let query = parser.parse("^\\$"); 257 | 258 | let mach = fzf 259 | .set_case_sensitivity(CaseSensitivity::Sensitive) 260 | .distance(query, " "); 261 | 262 | assert!(mach.is_none()); 263 | } 264 | 265 | #[test] 266 | fn fzf_v1_score_4() { 267 | let mut fzf = FzfV1::new(); 268 | 269 | let mut parser = FzfParser::new(); 270 | 271 | let mut ranges = Vec::new(); 272 | 273 | let query = parser.parse("z\n"); 274 | 275 | let candidate = "ZZ\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u{65e}\nZ\u{65e}"; 276 | 277 | let _ = fzf 278 | .set_case_sensitivity(CaseSensitivity::Insensitive) 279 | .distance_and_ranges(query, candidate, &mut ranges) 280 | .unwrap(); 281 | 282 | assert_eq!(ranges, [1..2, 21..22]); 283 | } 284 | 285 | #[test] 286 | fn fzf_v1_score_5() { 287 | let mut fzf = FzfV1::new(); 288 | 289 | let mut parser = FzfParser::new(); 290 | 291 | let mut ranges = Vec::new(); 292 | 293 | let _ = fzf 294 | .set_case_sensitivity(CaseSensitivity::Sensitive) 295 | .set_candidate_normalization(true) 296 | .distance_and_ranges( 297 | parser.parse("e !"), 298 | " !I\\hh+\u{364}", 299 | &mut ranges, 300 | ) 301 | .unwrap(); 302 | 303 | assert_eq!(ranges, [7..9]); 304 | } 305 | 306 | #[test] 307 | fn fzf_v1_score_6() { 308 | let mut fzf = FzfV1::new(); 309 | 310 | let mut parser = FzfParser::new(); 311 | 312 | let mut ranges = Vec::new(); 313 | 314 | let query = parser.parse("^e"); 315 | 316 | let _ = fzf 317 | .set_case_sensitivity(CaseSensitivity::Insensitive) 318 | .set_candidate_normalization(true) 319 | .distance_and_ranges(query, "\u{364}", &mut ranges); 320 | 321 | assert_eq!(ranges, [0..2]); 322 | } 323 | -------------------------------------------------------------------------------- /tests/fzf_v2.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::single_range_in_vec_init)] 2 | 3 | mod fzf_common; 4 | 5 | use fzf_common as common; 6 | use norm::fzf::{bonus, FzfParser, FzfV2}; 7 | use norm::{CaseSensitivity, Metric}; 8 | 9 | #[test] 10 | fn fzf_v2_upstream_empty() { 11 | common::upstream_empty::(); 12 | } 13 | 14 | #[test] 15 | fn fzf_v2_upstream_exact_1() { 16 | common::upstream_exact_1::(); 17 | } 18 | 19 | #[test] 20 | fn fzf_v2_upstream_exact_2() { 21 | common::upstream_exact_2::(); 22 | } 23 | 24 | #[test] 25 | fn fzf_v2_upstream_exact_3() { 26 | common::upstream_exact_3::() 27 | } 28 | 29 | #[test] 30 | fn fzf_v2_upstream_exact_4() { 31 | common::upstream_exact_4::() 32 | } 33 | 34 | #[test] 35 | fn fzf_v2_upstream_exact_5() { 36 | common::upstream_exact_5::() 37 | } 38 | 39 | #[test] 40 | fn fzf_v2_upstream_exact_6() { 41 | common::upstream_exact_6::() 42 | } 43 | 44 | #[test] 45 | fn fzf_v2_upstream_exact_7() { 46 | common::upstream_exact_7::() 47 | } 48 | 49 | #[test] 50 | fn fzf_v2_upstream_fuzzy_1() { 51 | common::upstream_fuzzy_1::(); 52 | } 53 | 54 | #[test] 55 | fn fzf_v2_upstream_fuzzy_2() { 56 | common::upstream_fuzzy_2::(); 57 | } 58 | 59 | #[test] 60 | fn fzf_v2_upstream_fuzzy_3() { 61 | common::upstream_fuzzy_3::(); 62 | } 63 | 64 | #[test] 65 | fn fzf_v2_upstream_fuzzy_4() { 66 | common::upstream_fuzzy_4::(); 67 | } 68 | 69 | #[test] 70 | fn fzf_v2_upstream_fuzzy_5() { 71 | common::upstream_fuzzy_5::(); 72 | } 73 | 74 | #[test] 75 | fn fzf_v2_upstream_fuzzy_6() { 76 | common::upstream_fuzzy_6::(); 77 | } 78 | 79 | #[test] 80 | fn fzf_v2_upstream_fuzzy_7() { 81 | common::upstream_fuzzy_7::(); 82 | } 83 | 84 | #[test] 85 | fn fzf_v2_upstream_fuzzy_8() { 86 | common::upstream_fuzzy_8::(); 87 | } 88 | 89 | #[test] 90 | fn fzf_v2_upstream_fuzzy_9() { 91 | common::upstream_fuzzy_9::(); 92 | } 93 | 94 | #[test] 95 | fn fzf_v2_upstream_fuzzy_10() { 96 | common::upstream_fuzzy_10::(); 97 | } 98 | 99 | #[test] 100 | fn fzf_v2_upstream_fuzzy_11() { 101 | common::upstream_fuzzy_11::(); 102 | } 103 | 104 | #[test] 105 | fn fzf_v2_upstream_fuzzy_12() { 106 | common::upstream_fuzzy_12::(); 107 | } 108 | 109 | #[test] 110 | fn fzf_v2_upstream_fuzzy_13() { 111 | common::upstream_fuzzy_13::(); 112 | } 113 | 114 | #[test] 115 | fn fzf_v2_upstream_fuzzy_14() { 116 | common::upstream_fuzzy_14::(); 117 | } 118 | 119 | #[test] 120 | fn fzf_v2_upstream_fuzzy_15() { 121 | common::upstream_fuzzy_15::(); 122 | } 123 | 124 | #[test] 125 | fn fzf_v2_upstream_fuzzy_16() { 126 | common::upstream_fuzzy_16::(); 127 | } 128 | 129 | #[test] 130 | fn fzf_v2_upstream_fuzzy_17() { 131 | common::upstream_fuzzy_17::(); 132 | } 133 | 134 | #[test] 135 | fn fzf_v2_upstream_fuzzy_18() { 136 | common::upstream_fuzzy_18::(); 137 | } 138 | 139 | #[test] 140 | fn fzf_v2_upstream_fuzzy_19() { 141 | common::upstream_fuzzy_19::(); 142 | } 143 | 144 | #[test] 145 | fn fzf_v2_upstream_fuzzy_20() { 146 | common::upstream_fuzzy_20::(); 147 | } 148 | 149 | #[test] 150 | fn fzf_v2_upstream_prefix_1() { 151 | common::upstream_prefix_1::(); 152 | } 153 | 154 | #[test] 155 | fn fzf_v2_upstream_prefix_2() { 156 | common::upstream_prefix_2::(); 157 | } 158 | 159 | #[test] 160 | fn fzf_v2_upstream_prefix_3() { 161 | common::upstream_prefix_3::() 162 | } 163 | 164 | #[test] 165 | fn fzf_v2_upstream_prefix_4() { 166 | common::upstream_prefix_4::() 167 | } 168 | 169 | #[test] 170 | fn fzf_v2_upstream_prefix_5() { 171 | common::upstream_prefix_5::() 172 | } 173 | 174 | #[test] 175 | fn fzf_v2_upstream_prefix_6() { 176 | common::upstream_prefix_6::() 177 | } 178 | 179 | #[test] 180 | fn fzf_v2_upstream_prefix_7() { 181 | common::upstream_prefix_7::() 182 | } 183 | 184 | #[test] 185 | fn fzf_v2_upstream_prefix_8() { 186 | common::upstream_prefix_8::() 187 | } 188 | 189 | #[test] 190 | fn fzf_v2_upstream_suffix_1() { 191 | common::upstream_suffix_1::(); 192 | } 193 | 194 | #[test] 195 | fn fzf_v2_upstream_suffix_2() { 196 | common::upstream_suffix_2::(); 197 | } 198 | 199 | #[test] 200 | fn fzf_v2_upstream_suffix_3() { 201 | common::upstream_suffix_3::() 202 | } 203 | 204 | #[test] 205 | fn fzf_v2_upstream_suffix_4() { 206 | common::upstream_suffix_4::() 207 | } 208 | 209 | #[test] 210 | fn fzf_v2_upstream_suffix_5() { 211 | common::upstream_suffix_5::() 212 | } 213 | 214 | #[test] 215 | fn fzf_v2_upstream_suffix_6() { 216 | common::upstream_suffix_6::() 217 | } 218 | 219 | #[test] 220 | fn fzf_v2_score_1() { 221 | let mut fzf = FzfV2::new(); 222 | 223 | let mut parser = FzfParser::new(); 224 | 225 | let mut ranges = Vec::new(); 226 | 227 | let distance = fzf 228 | .set_case_sensitivity(CaseSensitivity::Sensitive) 229 | .distance_and_ranges(parser.parse("jelly"), "jellyfish", &mut ranges) 230 | .unwrap(); 231 | 232 | assert_eq!( 233 | distance.into_score(), 234 | bonus::MATCH * 5 235 | + fzf.scheme().bonus_boundary_white 236 | * bonus::FIRST_QUERY_CHAR_MULTIPLIER 237 | + fzf.scheme().bonus_boundary_white * 4 238 | ); 239 | 240 | assert_eq!(ranges, [0..5]); 241 | } 242 | 243 | #[test] 244 | fn fzf_v2_score_2() { 245 | let mut fzf = FzfV2::new(); 246 | 247 | let mut parser = FzfParser::new(); 248 | 249 | let distance = fzf 250 | .set_case_sensitivity(CaseSensitivity::Sensitive) 251 | .distance(parser.parse("!$"), "$$2"); 252 | 253 | assert!(distance.is_none()); 254 | } 255 | 256 | #[test] 257 | fn fzf_v2_score_3() { 258 | let mut fzf = FzfV2::new(); 259 | 260 | let mut parser = FzfParser::new(); 261 | 262 | let mut ranges = Vec::new(); 263 | 264 | let _ = fzf 265 | .set_case_sensitivity(CaseSensitivity::Sensitive) 266 | .distance_and_ranges( 267 | parser.parse("\0\0"), 268 | "\0#B\0\u{364}\0\0", 269 | &mut ranges, 270 | ) 271 | .unwrap(); 272 | 273 | assert_eq!(ranges, [6..8]); 274 | } 275 | 276 | #[test] 277 | fn fzf_v2_score_4() { 278 | let mut fzf = FzfV2::new(); 279 | 280 | let mut parser = FzfParser::new(); 281 | 282 | let mut ranges = Vec::new(); 283 | 284 | let _ = fzf 285 | .set_case_sensitivity(CaseSensitivity::Sensitive) 286 | .set_candidate_normalization(true) 287 | .distance_and_ranges( 288 | parser.parse("e !"), 289 | " !I\\hh+\u{364}", 290 | &mut ranges, 291 | ) 292 | .unwrap(); 293 | 294 | assert_eq!(ranges, [7..9]); 295 | } 296 | 297 | #[test] 298 | fn fzf_v2_score_5() { 299 | let mut fzf = FzfV2::new(); 300 | 301 | let mut parser = FzfParser::new(); 302 | 303 | let mut ranges = Vec::new(); 304 | 305 | let _ = fzf 306 | .set_case_sensitivity(CaseSensitivity::Insensitive) 307 | .set_candidate_normalization(true) 308 | .distance_and_ranges(parser.parse("E"), "\u{364}E", &mut ranges) 309 | .unwrap(); 310 | 311 | assert_eq!(ranges, [0..2]); 312 | } 313 | 314 | #[test] 315 | fn fzf_v2_score_6() { 316 | let mut fzf = FzfV2::new(); 317 | 318 | let mut parser = FzfParser::new(); 319 | 320 | let mut ranges = Vec::new(); 321 | 322 | let query = parser.parse("!2\t\0\0\0WWHHWHWWWWWWWZ !I"); 323 | 324 | let distance = fzf 325 | .set_case_sensitivity(CaseSensitivity::Insensitive) 326 | .set_candidate_normalization(true) 327 | .distance_and_ranges( 328 | query, 329 | "\u{6}\0\0 N\u{364}\u{e}\u{365}+", 330 | &mut ranges, 331 | ); 332 | 333 | assert!(distance.is_none()); 334 | } 335 | --------------------------------------------------------------------------------