├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── snapcraft ├── .gitignore └── snapcraft.yaml └── src ├── bin ├── interactive.rs ├── main.rs ├── mod.rs └── opts.rs ├── lib.rs ├── score ├── config.rs └── mod.rs └── search └── mod.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | /Cargo.lock -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fzyr" 3 | version = "0.2.0" 4 | homepage = "https://github.com/jmaargh/fzyr" 5 | authors = ["jmaargh "] 6 | 7 | description = "A fuzzy(er) finder 🔎" 8 | repository = "https://github.com/jmaargh/fzyr" 9 | # TODO: 10 | # documentation = "" 11 | 12 | readme = "README.md" 13 | categories = ["algorithms", "command-line-utilities"] 14 | keywords = ["fuzzy", "finder", "find", "search"] 15 | license-file = "LICENSE" 16 | 17 | autobins = false 18 | 19 | 20 | [lib] 21 | name = "fzyr" 22 | 23 | 24 | [[bin]] 25 | name = "fzyr" 26 | path = "src/bin/main.rs" 27 | doc = false 28 | 29 | 30 | [dependencies] 31 | ndarray = "^0.11.2" 32 | itertools = "^0.7.8" 33 | crossbeam = "^0.4.1" 34 | bit-vec = "^0.5.0" 35 | clap = "^2.32.0" 36 | console = "^0.6.1" 37 | 38 | 39 | [profile.release] 40 | opt-level = 3 41 | debug = false 42 | lto = true 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright 2018 John-Mark Allen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fzyr 2 | 3 | **fzyr** is a simple and fast fuzzy text search. It exists as both a Rust library and a standalone executable. 4 | 5 | Basically [fzy](https://github.com/jhawthorn/fzy) re-written in [Rust](https://www.rust-lang.org/). 6 | 7 | ## Why? 8 | 9 | `fzyr` exists because I wanted a fuzzy finder library while learning Rust. However, you may find that it useful for your purposes 10 | 11 | `fzyr` is very similar to `fzy`, so inherits its advantages (at least as of Aug 2018). For most purposes it should be usable as a drop-in replacement. 12 | 13 | Advantages over `fzy`: 14 | + It's works on Windows! Or at least it should, that's not actually been tested yet, let me know if it doesn't 🖥 15 | + It works with all unicode strings! Hello, rest of the world 🗺️ 16 | + You can easily install with [Cargo](https://doc.rust-lang.org/stable/cargo/)! Cross-platform package management 📦 17 | + It's a Rust library! Use the algorithm in your own projects 😀 18 | 19 | Disadvantages over `fzy`: 20 | + It's less-well tested 21 | + It doesn't support arbitrary tty i/o (only stdin/stdout) 22 | + Interactive mode needs more work 23 | 24 | ## Installation 25 | 26 | # [Cargo](https://doc.rust-lang.org/stable/cargo/) 27 | 28 | You can install on any supported platform using Cargo, Rust's excellent pacakge 29 | manager. 30 | 31 | $ cargo install fzyr 32 | 33 | # Linux 34 | 35 | You can use Cargo, or if you'd prefer install as a [snap](https://snapcraft.io/) 36 | 37 | $ snap install fzyr 38 | 39 | # Homebrew 40 | 41 | Might arrive at some point... 42 | 43 | # Windows 44 | 45 | Use Cargo 46 | 47 | ## Usage 48 | 49 | Check out [fzy](https://github.com/jhawthorn/fzy#usage) for some usage examples. 50 | 51 | To search for lines containing "something" in a file: 52 | 53 | $ cat very-long-file | fzyr -q something 54 | 55 | To search interactively for a file: 56 | 57 | $ find . -type f | fzyr 58 | 59 | Explore the options with: 60 | 61 | $ fzyr -h 62 | 63 | ## Library documentation 64 | 65 | Coming soon... 66 | 67 | ## Algorithm 68 | 69 | The alorithm is near-identical to that of `fzy`. That means: 70 | + Search is case-insensitive (all characters are converted to their unicode-defined lowercase version, if one exists) 71 | + Results must contain the entire query string, in the right order, but without the letters necessarily being consecutive 72 | + Results are all given a numerical score, and returned in best-score-first order 73 | + Prefers consecutive characters and characters that start words/filenames 74 | + Prefers shorter results 75 | 76 | ## To-do list 77 | 78 | Feel free to make a PR if you're so moved 79 | + Improve interactive mode 80 | + Library documentation 81 | + Tests for `search_locate()` 82 | + Integration tests 83 | + Benchmarks 84 | + Package for various OSs 85 | + Zero-allocation search 86 | + Arbitrary tty i/o 87 | -------------------------------------------------------------------------------- /snapcraft/.gitignore: -------------------------------------------------------------------------------- 1 | /snap/ 2 | /parts/ 3 | /prime/ 4 | /stage/ 5 | 6 | fzyr_*_*.snap -------------------------------------------------------------------------------- /snapcraft/snapcraft.yaml: -------------------------------------------------------------------------------- 1 | name: fzyr 2 | version: 0.2.0 3 | summary: Fast fuzzy unicode text search 4 | description: | 5 | **fzyr** is a simple and fast fuzzy text search. It exists as both a Rust 6 | library and a standalone executable. Basically fzy re-written in Rust. 7 | 8 | confinement: strict 9 | 10 | grade: stable 11 | 12 | apps: 13 | fzyr: 14 | command: fzyr 15 | 16 | parts: 17 | fzyr: 18 | source: .. 19 | plugin: rust 20 | build-attributes: [no-system-libraries] 21 | -------------------------------------------------------------------------------- /src/bin/interactive.rs: -------------------------------------------------------------------------------- 1 | extern crate console; 2 | 3 | use io; 4 | use std::io::Write; 5 | 6 | use self::console::{Key, Style, Term}; 7 | 8 | use fzyr::config::SCORE_MIN; 9 | use fzyr::{search_locate, LocateResult, LocateResults}; 10 | 11 | use super::opts; 12 | 13 | pub fn run(candidates: &[&str], options: &opts::Options) -> i32 { 14 | let mut terminal = Terminal::new(&options.prompt, options.show_scores, options.lines); 15 | 16 | if let Err(_) = terminal.run(candidates, options.parallelism) { 17 | eprintln!("Failed to write to stdout"); 18 | 1 19 | } else { 20 | 0 21 | } 22 | } 23 | 24 | struct Terminal<'a> { 25 | result_count: usize, 26 | max_display_width: usize, 27 | prompt: &'a str, 28 | show_scores: bool, 29 | drawn_lines: usize, 30 | term: Term, 31 | standout: Style, 32 | } 33 | 34 | impl<'a> Terminal<'a> { 35 | fn new(prompt: &'a str, show_scores: bool, max_results: usize) -> Self { 36 | let term = Term::stdout(); 37 | let size = term.size(); 38 | Self { 39 | result_count: max_results.min((size.0 as usize).saturating_sub(1)), 40 | max_display_width: size.1 as usize, 41 | prompt: prompt, 42 | show_scores: show_scores, 43 | drawn_lines: 0, 44 | term: term, 45 | standout: Style::new().reverse(), 46 | } 47 | } 48 | } 49 | 50 | impl<'a> Terminal<'a> { 51 | fn run(&mut self, candidates: &[&str], parallelism: usize) -> io::Result<()> { 52 | let mut query = String::with_capacity(opts::DEFLT_STRING_BUFFER_LEN); 53 | 54 | let mut should_search = true; 55 | loop { 56 | if should_search { 57 | let search_results = search_locate(&query, candidates, parallelism); 58 | self.draw(&query, candidates, &search_results)?; 59 | } 60 | 61 | should_search = match self.term.read_key()? { 62 | Key::Char(ch) if ch == '\u{08}' || ch == '\u{7f}' => match query.pop() { 63 | // Backspace or delete 64 | Some(_) => true, 65 | None => false, 66 | }, 67 | Key::Char(ch) => { 68 | query.push(ch); 69 | true 70 | } 71 | _ => false, 72 | }; 73 | } 74 | } 75 | 76 | fn draw(&mut self, query: &str, candidates: &[&str], results: &LocateResults) -> io::Result<()> { 77 | self.clear()?; 78 | self.draw_query(query)?; 79 | self.draw_results(candidates, results)?; 80 | Ok(()) 81 | } 82 | 83 | fn clear(&mut self) -> io::Result<()> { 84 | self.term.clear_line()?; 85 | self.term.clear_last_lines(if self.drawn_lines > 1 { 86 | self.drawn_lines.checked_sub(1).unwrap_or(0) 87 | } else { 88 | self.drawn_lines 89 | })?; 90 | self.drawn_lines = 0; 91 | Ok(()) 92 | } 93 | 94 | fn draw_query(&mut self, query: &str) -> io::Result<()> { 95 | writeln!( 96 | self.term, 97 | "{}{}{}", 98 | self.prompt, 99 | query, 100 | self.standout.apply_to(" "), 101 | )?; 102 | self.drawn_lines += 1; 103 | Ok(()) 104 | } 105 | 106 | fn draw_results(&mut self, candidates: &[&str], results: &LocateResults) -> io::Result<()> { 107 | // Write the results 108 | let total_results = results.len().min(self.result_count); 109 | let mut line_count: usize = 0; 110 | for result in results.iter().take(total_results) { 111 | if line_count > 0 { 112 | self.term.write_line("")?; 113 | } 114 | self.draw_result(candidates, result)?; 115 | line_count += 1; 116 | self.drawn_lines += 1; 117 | } 118 | 119 | // Write empty lines for the rest 120 | while line_count < total_results { 121 | self.draw_empty()?; 122 | line_count += 1; 123 | self.drawn_lines += 1; 124 | } 125 | 126 | Ok(()) 127 | } 128 | 129 | fn draw_empty(&mut self) -> io::Result<()> { 130 | self.term.write_line("") 131 | } 132 | 133 | fn draw_result(&mut self, candidates: &[&str], result: &LocateResult) -> io::Result<()> { 134 | let mut spent_width = 0; 135 | 136 | if self.show_scores { 137 | if result.score == SCORE_MIN { 138 | write!(self.term, "( ) ")?; 139 | } else { 140 | write!(self.term, "({:5.2}) ", result.score)?; 141 | } 142 | spent_width += 8; 143 | } 144 | 145 | let found = candidates[result.candidate_index]; 146 | for (i, ch) in found 147 | .chars() 148 | .take(self.max_display_width - spent_width) 149 | .enumerate() 150 | { 151 | if result.match_mask[i] { 152 | write!(self.term, "{}", self.standout.apply_to(ch))?; 153 | } else { 154 | write!(self.term, "{}", ch)?; 155 | } 156 | } 157 | 158 | Ok(()) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/bin/main.rs: -------------------------------------------------------------------------------- 1 | extern crate fzyr; 2 | 3 | mod interactive; 4 | mod opts; 5 | 6 | use std::io; 7 | use std::process; 8 | 9 | use fzyr::config::SCORE_MIN; 10 | use fzyr::search_score; 11 | 12 | fn candidates_from_stdin() -> Vec { 13 | let stdin = io::stdin(); 14 | 15 | let mut out = Vec::new(); 16 | let mut buff = String::with_capacity(opts::DEFLT_STRING_BUFFER_LEN); 17 | while let Ok(bytes) = stdin.read_line(&mut buff) { 18 | if bytes == 0 { 19 | break; 20 | } 21 | out.push(buff.clone()); 22 | buff.clear(); 23 | } 24 | 25 | out 26 | } 27 | 28 | fn to_slices<'src>(strings: &'src Vec) -> Vec<&'src str> { 29 | strings 30 | .iter() 31 | .map(|s| s.trim()) 32 | .filter(|s| !s.is_empty()) 33 | .collect() 34 | } 35 | 36 | fn run() -> i32 { 37 | let options = opts::cmd_parse(); 38 | 39 | if options.benchmark > 0 && options.query.is_empty() { 40 | println!("To benchmark, provide a query with one of the -q/-e/--query/--show-matches flags"); 41 | return 1; 42 | } 43 | 44 | let candidates = candidates_from_stdin(); 45 | let candidates = to_slices(&candidates); 46 | 47 | if options.benchmark > 0 { 48 | // Run a benchmarking run without output 49 | for _ in 0..options.benchmark { 50 | search_score(&options.query, &candidates, options.parallelism); 51 | } 52 | 0 53 | } else if !options.query.is_empty() { 54 | // Run printing to stdout 55 | let results = search_score(&options.query, &candidates, options.parallelism); 56 | for result in results.iter().take(options.lines) { 57 | if options.show_scores { 58 | if result.score == SCORE_MIN { 59 | print!("( ) "); 60 | } else { 61 | print!("({:5.2}) ", result.score); 62 | } 63 | println!("{}", candidates[result.candidate_index]); 64 | } 65 | } 66 | 0 67 | } else { 68 | // Run interactively 69 | interactive::run(&candidates, &options) 70 | } 71 | } 72 | 73 | fn main() { 74 | process::exit(run()); 75 | } 76 | -------------------------------------------------------------------------------- /src/bin/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod interactive; 2 | pub mod opts; 3 | -------------------------------------------------------------------------------- /src/bin/opts.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | 3 | use self::clap::{App, Arg}; 4 | 5 | pub const NAME: &'static str = env!("CARGO_PKG_NAME"); 6 | pub const VERSION: &'static str = env!("CARGO_PKG_VERSION"); 7 | pub const WEBSITE: &'static str = env!("CARGO_PKG_HOMEPAGE"); 8 | pub const DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION"); 9 | 10 | pub const DEFLT_STRING_BUFFER_LEN: usize = 128; 11 | 12 | #[derive(Debug)] 13 | pub struct Options { 14 | pub query: String, 15 | pub lines: usize, 16 | pub show_scores: bool, 17 | pub parallelism: usize, 18 | pub prompt: String, 19 | pub benchmark: usize, 20 | } 21 | 22 | impl Default for Options { 23 | fn default() -> Self { 24 | Self { 25 | query: String::new(), 26 | lines: 10, 27 | show_scores: false, 28 | parallelism: 4, 29 | prompt: "> ".to_string(), 30 | benchmark: 0, 31 | } 32 | } 33 | } 34 | 35 | pub fn cmd_parse() -> Options { 36 | let mut out = Options::default(); 37 | 38 | let deflt_query = out.query.to_string(); 39 | let deflt_lines = out.lines.to_string(); 40 | let deflt_parallelism = out.parallelism.to_string(); 41 | let deflt_prompt = out.prompt.to_string(); 42 | let deflt_benchmark = out.benchmark.to_string(); 43 | 44 | let long_about: String = format!("{}\n[{}]", DESCRIPTION, WEBSITE); 45 | 46 | let matches = App::new(NAME) 47 | .version(VERSION) 48 | .about(DESCRIPTION) 49 | .long_about(long_about.as_ref()) 50 | .arg( 51 | Arg::with_name("query") 52 | .short("q") 53 | .long("query") 54 | .value_name("QUERY") 55 | .default_value(&deflt_query) 56 | .help("Query string to search for"), 57 | ) 58 | .arg( 59 | Arg::with_name("lines") 60 | .short("l") 61 | .long("lines") 62 | .value_name("LINES") 63 | .default_value(&deflt_lines) 64 | .help("Number of output lines to display"), 65 | ) 66 | .arg( 67 | Arg::with_name("show-scores") 68 | .short("s") 69 | .long("show-scores") 70 | .help("Show numerical scores for each match"), 71 | ) 72 | .arg( 73 | Arg::with_name("parallelism") 74 | .short("j") 75 | .long("parallelism") 76 | .value_name("THREADS") 77 | .default_value(&deflt_parallelism) 78 | .help("Maximum number of worker threads to use"), 79 | ) 80 | .arg( 81 | Arg::with_name("prompt") 82 | .short("p") 83 | .long("prompt") 84 | .value_name("PROMPT") 85 | .default_value(&deflt_prompt) 86 | .help("Propmt to show when entering queries"), 87 | ) 88 | .arg( 89 | Arg::with_name("benchmark") 90 | .short("b") 91 | .long("benchmark") 92 | .value_name("REPEATS") 93 | .default_value(&deflt_benchmark) 94 | .help("Set to a positive value to run that many repeated searches for benchmarking"), 95 | ) 96 | .arg( 97 | Arg::with_name("workers") 98 | .long("workers") 99 | .value_name("THREADS") 100 | .help("Identical to \"--parallelism\""), 101 | ) 102 | .arg( 103 | Arg::with_name("show-matches") 104 | .short("e") 105 | .long("show-matches") 106 | .value_name("QUERY") 107 | .help("Identical to \"--query\""), 108 | ) 109 | .get_matches(); 110 | 111 | out.query = if matches.is_present("query") { 112 | matches.value_of("query").unwrap().to_string() 113 | } else if matches.is_present("show-matches") { 114 | matches.value_of("show-matches").unwrap().to_string() 115 | } else { 116 | out.query 117 | }; 118 | out.lines = matches 119 | .value_of("lines") 120 | .unwrap_or(&deflt_query) 121 | .parse() 122 | .unwrap_or(out.lines); 123 | out.show_scores = matches.is_present("show-scores"); 124 | out.parallelism = { 125 | if matches.is_present("parallelism") { 126 | matches.value_of("parallelism").unwrap() 127 | } else if matches.is_present("workers") { 128 | matches.value_of("workers").unwrap() 129 | } else { 130 | &deflt_parallelism 131 | } 132 | }.parse() 133 | .unwrap_or(out.parallelism); 134 | out.prompt = matches 135 | .value_of("prompt") 136 | .unwrap_or(&out.prompt) 137 | .to_string(); 138 | out.benchmark = matches 139 | .value_of("benchmark") 140 | .unwrap_or(&deflt_benchmark) 141 | .parse() 142 | .unwrap_or(out.benchmark); 143 | 144 | out 145 | } 146 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | mod score; 2 | mod search; 3 | 4 | pub use score::{config, has_match, locate, score, LocateResult, Score, ScoreResult}; 5 | pub use search::{search_locate, search_score, LocateResults, ScoreResults}; 6 | -------------------------------------------------------------------------------- /src/score/config.rs: -------------------------------------------------------------------------------- 1 | extern crate std; 2 | 3 | use std::f64; 4 | 5 | use score::Score; 6 | 7 | pub const SCORE_MIN: Score = f64::NEG_INFINITY; 8 | pub const SCORE_MAX: Score = f64::INFINITY; 9 | 10 | pub const SCORE_GAP_LEADING: Score = -0.005; 11 | pub const SCORE_GAP_INNER: Score = -0.01; 12 | pub const SCORE_GAP_TRAILING: Score = -0.005; 13 | 14 | pub const SCORE_MATCH_CONSECUTIVE: Score = 1.0; 15 | pub const SCORE_MATCH_SLASH: Score = 0.9; 16 | pub const SCORE_MATCH_WORD: Score = 0.8; 17 | pub const SCORE_MATCH_CAPITAL: Score = 0.7; 18 | pub const SCORE_MATCH_DOT: Score = 0.6; 19 | 20 | pub const CANDIDATE_MAX_BYTES: usize = 2048; 21 | pub const CANDIDATE_MAX_CHARS: usize = 1024; 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | use super::*; 26 | 27 | fn assert_positive(val: f64) { 28 | assert!(val > 0.0); 29 | } 30 | 31 | fn assert_negative(val: f64) { 32 | assert!(val < 0.0); 33 | } 34 | 35 | #[test] 36 | fn positive_scores() { 37 | assert_positive(SCORE_MAX); 38 | assert_positive(SCORE_MATCH_CONSECUTIVE); 39 | assert_positive(SCORE_MATCH_SLASH); 40 | assert_positive(SCORE_MATCH_WORD); 41 | assert_positive(SCORE_MATCH_CAPITAL); 42 | assert_positive(SCORE_MATCH_DOT); 43 | } 44 | 45 | #[test] 46 | fn negative_scores() { 47 | assert_negative(SCORE_MIN); 48 | assert_negative(SCORE_GAP_LEADING); 49 | assert_negative(SCORE_GAP_INNER); 50 | assert_negative(SCORE_GAP_TRAILING); 51 | } 52 | 53 | #[test] 54 | fn non_zero() { 55 | assert_ne!(0, CANDIDATE_MAX_BYTES); 56 | assert_ne!(0, CANDIDATE_MAX_CHARS); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/score/mod.rs: -------------------------------------------------------------------------------- 1 | extern crate bit_vec; 2 | extern crate ndarray; 3 | 4 | pub mod config; 5 | 6 | use std::cmp::Ordering; 7 | 8 | use self::bit_vec::BitVec; 9 | use self::ndarray::prelude::*; 10 | 11 | use self::config::*; 12 | 13 | pub type Score = f64; 14 | type ScoreMatrix = Array2; 15 | 16 | /// Result of querying the score against a candidate 17 | #[derive(Debug)] 18 | pub struct ScoreResult { 19 | pub candidate_index: usize, 20 | pub score: Score, 21 | } 22 | 23 | /// Result of querying the score and location against a candidate 24 | #[derive(Debug)] 25 | pub struct LocateResult { 26 | pub candidate_index: usize, 27 | pub score: Score, 28 | /// Binary mask showing where the charcaters of the query match the candidate 29 | pub match_mask: BitVec, 30 | } 31 | 32 | impl ScoreResult { 33 | pub fn new(candidate_index: usize) -> Self { 34 | Self::with_score(candidate_index, SCORE_MIN) 35 | } 36 | 37 | pub fn with_score(candidate_index: usize, score: Score) -> Self { 38 | Self { 39 | candidate_index, 40 | score, 41 | } 42 | } 43 | } 44 | 45 | impl PartialOrd for ScoreResult { 46 | fn partial_cmp(&self, other: &Self) -> Option { 47 | Some( 48 | self 49 | .score 50 | .partial_cmp(&other.score) 51 | .unwrap_or(Ordering::Less) 52 | .reverse(), 53 | ) 54 | } 55 | } 56 | 57 | impl PartialEq for ScoreResult { 58 | fn eq(&self, other: &Self) -> bool { 59 | self.score == other.score 60 | } 61 | } 62 | 63 | impl LocateResult { 64 | pub fn new(candidate_index: usize, candidate_size: usize) -> Self { 65 | Self::with_score(candidate_index, candidate_size, SCORE_MIN) 66 | } 67 | 68 | pub fn with_score(candidate_index: usize, candidate_size: usize, score: Score) -> Self { 69 | Self { 70 | candidate_index, 71 | score: score, 72 | match_mask: BitVec::from_elem(candidate_size, false), 73 | } 74 | } 75 | } 76 | 77 | impl PartialOrd for LocateResult { 78 | fn partial_cmp(&self, other: &Self) -> Option { 79 | Some( 80 | self 81 | .score 82 | .partial_cmp(&other.score) 83 | .unwrap_or(Ordering::Less) 84 | .reverse(), 85 | ) 86 | } 87 | } 88 | 89 | impl PartialEq for LocateResult { 90 | fn eq(&self, other: &Self) -> bool { 91 | self.score == other.score 92 | } 93 | } 94 | 95 | /// Returns `true` if and only if `candidate` is a match for `query` 96 | /// 97 | /// A "match" must contain all of the letters of `query` in order, but not 98 | /// necessarily continguously. 99 | pub fn has_match(query: &str, candidate: &str) -> bool { 100 | let mut cand_iter = candidate.chars(); 101 | // Note: `cand_iter` will be advanced during `all`, which is short-circuiting 102 | query 103 | .chars() 104 | .all(|c| cand_iter.any(|c2| c2.to_lowercase().eq(c.to_lowercase()))) 105 | } 106 | 107 | /// Calculates a score for how well a `query` matches a `candidate` 108 | /// 109 | /// Higher scores are better 110 | pub fn score(query: &str, candidate: &str) -> ScoreResult { 111 | score_inner(query, candidate, 0) 112 | } 113 | 114 | pub(crate) fn score_inner(query: &str, candidate: &str, index: usize) -> ScoreResult { 115 | let (q_len, c_len) = match get_lengths(query, candidate) { 116 | LengthsOrScore::Score(s) => return ScoreResult::with_score(index, s), 117 | LengthsOrScore::Lengths(q, c) => (q, c), 118 | }; 119 | 120 | let (best_score_overall, _) = score_internal(query, candidate, q_len, c_len); 121 | ScoreResult::with_score(index, best_score_overall[[q_len - 1, c_len - 1]]) 122 | } 123 | 124 | /// Calculates a score for how well a `query` matches a `candidate` and gives 125 | /// the locations of the `query` characters in the `candidate` too 126 | /// 127 | /// Higher scores are better 128 | pub fn locate(query: &str, candidate: &str) -> LocateResult { 129 | locate_inner(query, candidate, 0) 130 | } 131 | 132 | pub(crate) fn locate_inner(query: &str, candidate: &str, index: usize) -> LocateResult { 133 | let candidate_chars = candidate.chars().count(); 134 | let (q_len, c_len) = match get_lengths(query, candidate) { 135 | LengthsOrScore::Score(s) => { 136 | let mut out = LocateResult::with_score(index, candidate_chars, s); 137 | if s == SCORE_MAX { 138 | // This was an exact match 139 | out.match_mask.set_all(); 140 | } 141 | return out; 142 | } 143 | LengthsOrScore::Lengths(q, c) => (q, c), 144 | }; 145 | 146 | let (best_score_overall, best_score_w_ending) = score_internal(query, candidate, q_len, c_len); 147 | let mut out = LocateResult::with_score(index, candidate_chars, best_score_overall[[q_len - 1, c_len - 1]]); 148 | 149 | let mut query_iter = query.chars(); 150 | let mut cand_iter = candidate.chars(); 151 | // Safe because we'll return at the beginning for zero or unit length 152 | let mut i = q_len; 153 | let mut j = c_len; 154 | while query_iter.next_back() != None { 155 | i = i.wrapping_sub(1); 156 | while cand_iter.next_back() != None { 157 | j = j.wrapping_sub(1); 158 | if best_score_w_ending[[i, j]] != SCORE_MIN 159 | && best_score_w_ending[[i, j]] == best_score_overall[[i, j]] 160 | { 161 | // There's a match here that was on an optimal path 162 | out.match_mask.set(j, true); 163 | break; // Go to the next query letter 164 | } 165 | } 166 | } 167 | 168 | out 169 | } 170 | 171 | enum LengthsOrScore { 172 | Lengths(usize, usize), 173 | Score(self::Score), 174 | } 175 | 176 | fn get_lengths(query: &str, candidate: &str) -> LengthsOrScore { 177 | if candidate.len() > CANDIDATE_MAX_BYTES || query.len() == 0 { 178 | // Candidate too long or query too short 179 | return LengthsOrScore::Score(SCORE_MIN); 180 | } 181 | 182 | let q_len = query.chars().count(); 183 | let c_len = candidate.chars().count(); 184 | 185 | if q_len == c_len { 186 | // This is only called when there _is_ a match (candidate contains all 187 | // chars of query in the right order, so equal lengths mean equal 188 | // strings 189 | return LengthsOrScore::Score(SCORE_MAX); 190 | } 191 | 192 | if c_len > CANDIDATE_MAX_CHARS { 193 | // Too many characters 194 | return LengthsOrScore::Score(SCORE_MIN); 195 | } 196 | 197 | LengthsOrScore::Lengths(q_len, c_len) 198 | } 199 | 200 | fn score_internal( 201 | query: &str, 202 | candidate: &str, 203 | q_len: usize, 204 | c_len: usize, 205 | ) -> (ScoreMatrix, ScoreMatrix) { 206 | let match_bonuses = candidate_match_bonuses(candidate); 207 | 208 | // Matrix of the best score for each position ending in a match 209 | let mut best_score_w_ending = ScoreMatrix::zeros((q_len, c_len)); 210 | // Matrix for the best score for each position. 211 | let mut best_score_overall = ScoreMatrix::zeros((q_len, c_len)); 212 | 213 | for (i, q_char) in query.chars().enumerate() { 214 | let mut prev_score = SCORE_MIN; 215 | let gap_score = if i == q_len - 1 { 216 | SCORE_GAP_TRAILING 217 | } else { 218 | SCORE_GAP_INNER 219 | }; 220 | 221 | for (j, c_char) in candidate.chars().enumerate() { 222 | if q_char.to_lowercase().eq(c_char.to_lowercase()) { 223 | // Get the score bonus for matching this char 224 | let score = if i == 0 { 225 | // Beginning of the query, penalty for leading gap 226 | (j as f64 * SCORE_GAP_LEADING) + match_bonuses[j] 227 | } else if j != 0 { 228 | // Middle of both query and candidate 229 | // Either give it the match bonus, or use the consecutive 230 | // match (which wil always be higher, but doesn't stack 231 | // with match bonus) 232 | (best_score_overall[[i - 1, j - 1]] + match_bonuses[j]) 233 | .max(best_score_w_ending[[i - 1, j - 1]] + SCORE_MATCH_CONSECUTIVE) 234 | } else { 235 | SCORE_MIN 236 | }; 237 | 238 | prev_score = score.max(prev_score + gap_score); 239 | best_score_overall[[i, j]] = prev_score; 240 | best_score_w_ending[[i, j]] = score; 241 | } else { 242 | // Give the score penalty for the gap 243 | prev_score = prev_score + gap_score; 244 | best_score_overall[[i, j]] = prev_score; 245 | // We don't end in a match 246 | best_score_w_ending[[i, j]] = SCORE_MIN; 247 | } 248 | } 249 | } 250 | 251 | (best_score_overall, best_score_w_ending) 252 | } 253 | 254 | fn candidate_match_bonuses(candidate: &str) -> Vec { 255 | let mut prev_char = '/'; 256 | candidate 257 | .chars() 258 | .map(|current| { 259 | let s = character_match_bonus(current, prev_char); 260 | prev_char = current; 261 | s 262 | }) 263 | .collect() 264 | } 265 | 266 | fn character_match_bonus(current: char, previous: char) -> Score { 267 | if current.is_uppercase() && previous.is_lowercase() { 268 | SCORE_MATCH_CAPITAL 269 | } else { 270 | match previous { 271 | '/' => SCORE_MATCH_SLASH, 272 | '.' => SCORE_MATCH_DOT, 273 | _ if is_separator(previous) => SCORE_MATCH_WORD, 274 | _ => 0.0, 275 | } 276 | } 277 | } 278 | 279 | fn is_separator(character: char) -> bool { 280 | match character { 281 | ' ' => true, 282 | '-' => true, 283 | '_' => true, 284 | _ => false, 285 | } 286 | } 287 | 288 | #[cfg(test)] 289 | mod tests { 290 | use super::*; 291 | 292 | #[test] 293 | fn exact_match() { 294 | assert!(has_match("query", "query")); 295 | assert!(has_match( 296 | "156aufsdn926f9=sdk/~']", 297 | "156aufsdn926f9=sdk/~']" 298 | )); 299 | assert!(has_match( 300 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫", 301 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫" 302 | )); 303 | } 304 | 305 | #[test] 306 | fn paratial_match() { 307 | assert!(has_match("ca", "candidate")); 308 | assert!(has_match("cat", "candidate")); 309 | assert!(has_match("ndt", "candidate")); 310 | assert!(has_match("nate", "candidate")); 311 | assert!(has_match("56aufn92=sd/~']", "156aufsdn926f9=sdk/~']")); 312 | assert!(has_match( 313 | "😨Ɣ·®x¯ÍĞɅƁƹ♺à☆Lj´ƙÑ♫", 314 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫" 315 | )); 316 | } 317 | 318 | #[test] 319 | fn case_match() { 320 | assert!(has_match("QUERY", "query")); 321 | assert!(has_match("query", "QUERY")); 322 | assert!(has_match("QuEry", "query")); 323 | assert!(has_match( 324 | "прописная буква", 325 | "ПРОПИСНАЯ БУКВА" 326 | )) 327 | } 328 | 329 | #[test] 330 | fn empty_match() { 331 | assert!(has_match("", "")); 332 | assert!(has_match("", "candidate")); 333 | assert!(has_match( 334 | "", 335 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫" 336 | )); 337 | assert!(has_match("", "прописная БУКВА")); 338 | assert!(has_match("", "a")); 339 | assert!(has_match("", "4561")); 340 | } 341 | 342 | #[test] 343 | fn bad_match() { 344 | assert!(!has_match("acb", "abc")); 345 | assert!(!has_match("a", "")); 346 | assert!(!has_match("abc", "def")); 347 | assert!(!has_match("😨Ɣ·®x¯ÍĞ.Ʌ", "5ù¨ȼ♕☩♘⚁^")); 348 | assert!(!has_match( 349 | "прописная БУКВА", 350 | "прописнаяБУКВА" 351 | )); 352 | assert!(!has_match( 353 | "БУКВА прописная", 354 | "прописная БУКВА" 355 | )); 356 | } 357 | 358 | #[test] 359 | fn score_pref_word_start() { 360 | assert!(score("amor", "app/models/order").score > score("amor", "app/models/zrder").score); 361 | assert!(score("amor", "app models-order").score > score("amor", "app models zrder").score); 362 | assert!(score("qart", "QuArTz").score > score("qart", "QuaRTz").score); 363 | } 364 | 365 | #[test] 366 | fn score_pref_consecutive_letters() { 367 | assert!(score("amo", "app/m/foo").score < score("amo", "app/models/foo").score); 368 | } 369 | 370 | #[test] 371 | fn score_pref_contiguous_vs_word() { 372 | assert!(score("gemfil", "Gemfile.lock").score < score("gemfil", "Gemfile").score); 373 | } 374 | 375 | #[test] 376 | fn score_pref_shorter() { 377 | assert!(score("abce", "abcdef").score > score("abce", "abc de").score); 378 | assert!(score("abc", " a b c ").score > score("abc", " a b c ").score); 379 | assert!(score("abc", " a b c ").score > score("abc", " a b c ").score); 380 | assert!(score("test", "tests").score > score("test", "testing").score); 381 | } 382 | 383 | #[test] 384 | fn score_prefer_start() { 385 | assert!(score("test", "testing").score > score("test", "/testing").score); 386 | } 387 | 388 | #[test] 389 | fn score_exact() { 390 | assert_eq!(SCORE_MAX, score("query", "query").score); 391 | assert_eq!( 392 | SCORE_MAX, 393 | score("156aufsdn926f9=sdk/~']", "156aufsdn926f9=sdk/~']").score 394 | ); 395 | assert_eq!( 396 | SCORE_MAX, 397 | score( 398 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫", 399 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫" 400 | ).score 401 | ); 402 | } 403 | 404 | #[test] 405 | fn score_empty() { 406 | assert_eq!(SCORE_MIN, score("", "").score); 407 | assert_eq!(SCORE_MIN, score("", "candidate").score); 408 | assert_eq!( 409 | SCORE_MIN, 410 | score( 411 | "", 412 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫" 413 | ).score 414 | ); 415 | assert_eq!(SCORE_MIN, score("", "прописная БУКВА").score); 416 | assert_eq!(SCORE_MIN, score("", "a").score); 417 | assert_eq!(SCORE_MIN, score("", "4561").score); 418 | } 419 | 420 | #[test] 421 | fn score_gaps() { 422 | assert_eq!(SCORE_GAP_LEADING, score("a", "*a").score); 423 | assert_eq!(SCORE_GAP_LEADING * 2.0, score("a", "*ba").score); 424 | assert_eq!( 425 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING, 426 | score("a", "**a*").score 427 | ); 428 | assert_eq!( 429 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING * 2.0, 430 | score("a", "**a**").score 431 | ); 432 | assert_eq!( 433 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CONSECUTIVE + SCORE_GAP_TRAILING * 2.0, 434 | score("aa", "**aa♺*").score 435 | ); 436 | assert_eq!( 437 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_INNER + SCORE_MATCH_WORD + SCORE_GAP_TRAILING * 2.0, 438 | score("ab", "**a-b♺*").score 439 | ); 440 | assert_eq!( 441 | SCORE_GAP_LEADING 442 | + SCORE_GAP_LEADING 443 | + SCORE_GAP_INNER 444 | + SCORE_GAP_TRAILING 445 | + SCORE_GAP_TRAILING, 446 | score("aa", "**a♺a**").score 447 | ); 448 | } 449 | 450 | #[test] 451 | fn score_consecutive() { 452 | assert_eq!( 453 | SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE, 454 | score("aa", "*aa").score 455 | ); 456 | assert_eq!( 457 | SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE * 2.0, 458 | score("aaa", "♫aaa").score 459 | ); 460 | assert_eq!( 461 | SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_CONSECUTIVE, 462 | score("aaa", "*a*aa").score 463 | ); 464 | } 465 | 466 | #[test] 467 | fn score_slash() { 468 | assert_eq!( 469 | SCORE_GAP_LEADING + SCORE_MATCH_SLASH, 470 | score("a", "/a").score 471 | ); 472 | assert_eq!( 473 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH, 474 | score("a", "*/a").score 475 | ); 476 | assert_eq!( 477 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH + SCORE_MATCH_CONSECUTIVE, 478 | score("aa", "a/aa").score 479 | ); 480 | } 481 | 482 | #[test] 483 | fn score_capital() { 484 | assert_eq!( 485 | SCORE_GAP_LEADING + SCORE_MATCH_CAPITAL, 486 | score("a", "bA").score 487 | ); 488 | assert_eq!( 489 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL, 490 | score("a", "baA").score 491 | ); 492 | assert_eq!( 493 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL + SCORE_MATCH_CONSECUTIVE, 494 | score("aa", "😞aAa").score 495 | ); 496 | } 497 | 498 | #[test] 499 | fn score_dot() { 500 | assert_eq!(SCORE_GAP_LEADING + SCORE_MATCH_DOT, score("a", ".a").score); 501 | assert_eq!( 502 | SCORE_GAP_LEADING * 3.0 + SCORE_MATCH_DOT, 503 | score("a", "*a.a").score 504 | ); 505 | assert_eq!( 506 | SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_DOT, 507 | score("a", "♫a.a").score 508 | ); 509 | } 510 | 511 | fn assert_locate_score(query: &str, candidate: &str, score: Score) { 512 | let result = locate(query, candidate); 513 | 514 | assert_eq!(score, result.score); 515 | } 516 | 517 | #[test] 518 | fn locate_exact() { 519 | assert_locate_score("query", "query", SCORE_MAX); 520 | assert_locate_score("156aufsdn926f9=sdk/~']", 521 | "156aufsdn926f9=sdk/~']", 522 | SCORE_MAX, 523 | ); 524 | assert_locate_score( 525 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫", 526 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫", 527 | SCORE_MAX, 528 | ); 529 | } 530 | 531 | #[test] 532 | fn locate_empty() { 533 | assert_locate_score("", "", SCORE_MIN); 534 | assert_locate_score("", "candidate", SCORE_MIN); 535 | assert_locate_score( 536 | "", 537 | "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆Lj😞´ƙºÑ♫, ", 538 | SCORE_MIN, 539 | ); 540 | assert_locate_score("", "прописная БУКВА", SCORE_MIN); 541 | assert_locate_score("", "a", SCORE_MIN); 542 | assert_locate_score("", "4561", SCORE_MIN); 543 | } 544 | 545 | #[test] 546 | fn locate_gaps() { 547 | assert_locate_score("a", "*a", SCORE_GAP_LEADING); 548 | assert_locate_score("a", "*ba", SCORE_GAP_LEADING * 2.0); 549 | assert_locate_score("a", "**a*", 550 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING, 551 | ); 552 | assert_locate_score("a", "**a**", 553 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING * 2.0, 554 | ); 555 | assert_locate_score("aa", "**aa♺*", 556 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CONSECUTIVE + SCORE_GAP_TRAILING * 2.0, 557 | ); 558 | assert_locate_score("ab", "**a-b♺*", 559 | SCORE_GAP_LEADING * 2.0 + SCORE_GAP_INNER + SCORE_MATCH_WORD + SCORE_GAP_TRAILING * 2.0, 560 | ); 561 | assert_locate_score("aa", "**a♺a**", 562 | SCORE_GAP_LEADING 563 | + SCORE_GAP_LEADING 564 | + SCORE_GAP_INNER 565 | + SCORE_GAP_TRAILING 566 | + SCORE_GAP_TRAILING, 567 | ); 568 | } 569 | 570 | #[test] 571 | fn locate_consecutive() { 572 | assert_locate_score("aa", "*aa", 573 | SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE, 574 | ); 575 | assert_locate_score("aaa", "♫aaa", 576 | SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE * 2.0, 577 | ); 578 | assert_locate_score("aaa", "*a*aa", 579 | SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_CONSECUTIVE, 580 | ); 581 | } 582 | 583 | #[test] 584 | fn locate_slash() { 585 | assert_locate_score("a", "/a", 586 | SCORE_GAP_LEADING + SCORE_MATCH_SLASH, 587 | ); 588 | assert_locate_score("a", "*/a", 589 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH, 590 | ); 591 | assert_locate_score("aa", "a/aa", 592 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH + SCORE_MATCH_CONSECUTIVE, 593 | ); 594 | } 595 | 596 | #[test] 597 | fn locate_capital() { 598 | assert_locate_score("a", "bA", 599 | SCORE_GAP_LEADING + SCORE_MATCH_CAPITAL, 600 | ); 601 | assert_locate_score("a", "baA", 602 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL, 603 | ); 604 | assert_locate_score("aa", "😞aAa", 605 | SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL + SCORE_MATCH_CONSECUTIVE, 606 | ); 607 | } 608 | 609 | #[test] 610 | fn locate_dot() { 611 | assert_locate_score("a", ".a", SCORE_GAP_LEADING + SCORE_MATCH_DOT); 612 | assert_locate_score("a", "*a.a", 613 | SCORE_GAP_LEADING * 3.0 + SCORE_MATCH_DOT, 614 | ); 615 | assert_locate_score("a", "♫a.a", 616 | SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_DOT, 617 | ); 618 | } 619 | 620 | } 621 | -------------------------------------------------------------------------------- /src/search/mod.rs: -------------------------------------------------------------------------------- 1 | extern crate crossbeam; 2 | extern crate itertools; 3 | 4 | use std::cmp::Ordering; 5 | use std::usize; 6 | 7 | use self::crossbeam::channel; 8 | use self::crossbeam::scope as thread_scope; 9 | use self::itertools::kmerge; 10 | 11 | use score::{has_match, locate_inner, score_inner, LocateResult, ScoreResult}; 12 | 13 | /// Collection of scores and the candidates they apply to 14 | pub type ScoreResults = Vec; 15 | /// Collection of scores, locations, and the candidates they apply to 16 | pub type LocateResults = Vec; 17 | 18 | /// Search among a collection of candidates using the given query, returning 19 | /// an ordered collection of results (highest score first) 20 | pub fn search_score( 21 | query: &str, 22 | candidates: &[&str], 23 | parallelism: usize, 24 | ) -> ScoreResults { 25 | search_internal(query, candidates, parallelism, score_inner).collect() 26 | } 27 | 28 | /// Search among a collection of candidates using the given query, returning 29 | /// an ordered collection of results (highest score first) with the locations 30 | /// of the query in each candidate 31 | pub fn search_locate( 32 | query: &str, 33 | candidates: &[&str], 34 | parallelism: usize, 35 | ) -> LocateResults { 36 | search_internal(query, candidates, parallelism, locate_inner).collect() 37 | } 38 | 39 | fn search_internal( 40 | query: &str, 41 | candidates: &[&str], 42 | parallelism: usize, 43 | search_fn: fn(&str, &str, usize) -> T, 44 | ) -> Box> 45 | where 46 | T: PartialOrd + Sized + Send + 'static, 47 | { 48 | let parallelism = calculate_parallelism(candidates.len(), parallelism, query.is_empty()); 49 | let mut candidates = candidates; 50 | let (sender, receiver) = channel::bounded::>(parallelism); 51 | 52 | if parallelism < 2 { 53 | Box::new(search_worker(candidates, query, 0, search_fn).into_iter()) 54 | } else { 55 | thread_scope(|scope| { 56 | let mut remaining_candidates = candidates.len(); 57 | let per_thread_count = ceil_div(remaining_candidates, parallelism); 58 | let mut thread_offset = 0; 59 | 60 | // Create "parallelism" threads 61 | while remaining_candidates > 0 { 62 | // Search in this thread's share 63 | let split = if remaining_candidates >= per_thread_count { 64 | remaining_candidates -= per_thread_count; 65 | per_thread_count 66 | } else { 67 | remaining_candidates = 0; 68 | remaining_candidates 69 | }; 70 | let split = candidates.split_at(split); 71 | let splitted_len = split.0.len(); 72 | let sender = sender.clone(); 73 | scope.spawn(move || { 74 | sender.send(search_worker(split.0, query, thread_offset, search_fn)); 75 | }); 76 | thread_offset += splitted_len; 77 | 78 | // Remove that share from the candidate slice 79 | candidates = split.1; 80 | } 81 | 82 | drop(sender); 83 | }); 84 | 85 | Box::new(kmerge(receiver)) 86 | } 87 | } 88 | 89 | // Search among candidates against a query in a single thread 90 | fn search_worker( 91 | candidates: &[&str], 92 | query: &str, 93 | offset_index: usize, 94 | search_fn: fn(&str, &str, usize) -> T 95 | ) -> Vec 96 | where 97 | T: PartialOrd, 98 | { 99 | let mut out = Vec::with_capacity(candidates.len()); 100 | for (index, candidate) in candidates.into_iter().enumerate() { 101 | if has_match(&query, candidate) { 102 | out.push(search_fn(&query, candidate, offset_index + index)); 103 | } 104 | } 105 | out.sort_unstable_by(|result1, result2| result1.partial_cmp(result2).unwrap_or(Ordering::Less)); 106 | 107 | out 108 | } 109 | 110 | fn calculate_parallelism( 111 | candidate_count: usize, 112 | configured_parallelism: usize, 113 | empty_query: bool, 114 | ) -> usize { 115 | if empty_query { 116 | // No need to do much for no query 117 | return 1; 118 | } 119 | 120 | // Use a ramp up to avoid unecessarily starting threads with few candidates 121 | let ramped_parallelism = match candidate_count { 122 | n if n < 17 => ceil_div(n, 4), 123 | n if n > 32 => ceil_div(n, 8), 124 | _ => 4, 125 | }; 126 | 127 | configured_parallelism 128 | .min(ramped_parallelism) 129 | .min(candidate_count) 130 | .max(1) 131 | } 132 | 133 | /// Integer ceiling division 134 | fn ceil_div(a: usize, b: usize) -> usize { 135 | (a + b - 1) / b 136 | } 137 | 138 | #[cfg(test)] 139 | mod tests { 140 | use super::*; 141 | 142 | #[test] 143 | fn parallelism_ramp() { 144 | assert_eq!(1, calculate_parallelism(0, 0, false)); 145 | assert_eq!(1, calculate_parallelism(1, 0, false)); 146 | assert_eq!(1, calculate_parallelism(0, 1, false)); 147 | assert_eq!(1, calculate_parallelism(1, 1, false)); 148 | 149 | assert_eq!(1, calculate_parallelism(2, usize::MAX, false)); 150 | assert_eq!(1, calculate_parallelism(3, 4, false)); 151 | assert_eq!(1, calculate_parallelism(4, 2, false)); 152 | 153 | for n in 5..9 { 154 | assert_eq!(2, calculate_parallelism(n, usize::MAX, false)); 155 | assert_eq!(1, calculate_parallelism(n, usize::MAX, true)); 156 | } 157 | 158 | for n in 9..13 { 159 | assert_eq!(3, calculate_parallelism(n, usize::MAX, false)); 160 | assert_eq!(1, calculate_parallelism(n, usize::MAX, true)); 161 | } 162 | 163 | for n in 13..33 { 164 | assert_eq!(4, calculate_parallelism(n, usize::MAX, false)); 165 | assert_eq!(1, calculate_parallelism(n, usize::MAX, true)); 166 | } 167 | 168 | for n in 1..10_000 { 169 | assert!(calculate_parallelism(n, 12, false) <= 12); 170 | assert_eq!(1, calculate_parallelism(n, 12, true)); 171 | } 172 | } 173 | 174 | fn search_empty_with_parallelism(parallelism: usize) { 175 | let rs = search_score("", &[], parallelism); 176 | assert_eq!(0, rs.len()); 177 | 178 | let rs = search_score("test", &[], parallelism); 179 | assert_eq!(0, rs.len()); 180 | } 181 | 182 | fn search_with_parallelism(parallelism: usize) { 183 | search_empty_with_parallelism(parallelism); 184 | 185 | let rs = search_score("", &["tags"], parallelism); 186 | assert_eq!(1, rs.len()); 187 | assert_eq!(0, rs[0].candidate_index); 188 | 189 | let rs = search_score("♺", &["ñîƹ♺à"], parallelism); 190 | assert_eq!(1, rs.len()); 191 | assert_eq!(0, rs[0].candidate_index); 192 | 193 | let cs = &["tags", "test"]; 194 | 195 | let rs = search_score("", cs, parallelism); 196 | assert_eq!(2, rs.len()); 197 | 198 | let rs = search_score("te", cs, parallelism); 199 | assert_eq!(1, rs.len()); 200 | assert_eq!(1, rs[0].candidate_index); 201 | 202 | let rs = search_score("foobar", cs, parallelism); 203 | assert_eq!(0, rs.len()); 204 | 205 | let rs = search_score("ts", cs, parallelism); 206 | assert_eq!(2, rs.len()); 207 | assert_eq!( 208 | vec![1, 0], 209 | rs.iter().map(|r| r.candidate_index).collect::>() 210 | ); 211 | } 212 | 213 | fn search_med_parallelism(parallelism: usize) { 214 | let cs = &[ 215 | "one", 216 | "two", 217 | "three", 218 | "four", 219 | "five", 220 | "six", 221 | "seven", 222 | "eight", 223 | "nine", 224 | "ten", 225 | "eleven", 226 | "twelve", 227 | "thirteen", 228 | "fourteen", 229 | "fifteen", 230 | "sixteen", 231 | "seventeen", 232 | "eighteen", 233 | "nineteen", 234 | "twenty", 235 | ]; 236 | 237 | let rs = search_score("", cs, parallelism); 238 | assert_eq!(cs.len(), rs.len()); 239 | 240 | let rs = search_score("teen", cs, parallelism); 241 | assert_eq!(7, rs.len()); 242 | for r in rs { 243 | assert_eq!( 244 | "neet", 245 | cs[r.candidate_index].chars().rev().take(4).collect::() 246 | ); 247 | } 248 | 249 | let rs = search_score("tee", cs, parallelism); 250 | assert_eq!(9, rs.len()); 251 | assert_eq!( 252 | "neet", 253 | cs[rs[0].candidate_index].chars().rev().take(4).collect::() 254 | ); 255 | 256 | let rs = search_score("six", cs, parallelism); 257 | assert_eq!("six", cs[rs[0].candidate_index]); 258 | } 259 | 260 | fn search_large_parallelism(parallelism: usize) { 261 | let n = 100_000; 262 | let mut candidates = Vec::with_capacity(n); 263 | for i in 0..n { 264 | candidates.push(format!("{}", i)); 265 | } 266 | 267 | let rs = search_score( 268 | "12", 269 | &(candidates.iter().map(|s| &s[..]).collect::>()), 270 | parallelism, 271 | ); 272 | 273 | // This has been precalculated 274 | // e.g. via `$ seq 0 99999 | grep '.*1.*2.*' | wc -l` 275 | assert_eq!(8146, rs.len()); 276 | assert_eq!("12", candidates[rs[0].candidate_index]); 277 | } 278 | 279 | // TODO: test locate 280 | 281 | #[test] 282 | fn search_single() { 283 | search_with_parallelism(0); 284 | search_with_parallelism(1); 285 | search_large_parallelism(1); 286 | } 287 | 288 | #[test] 289 | fn search_double() { 290 | search_with_parallelism(2); 291 | search_large_parallelism(2); 292 | } 293 | 294 | #[test] 295 | fn search_quad() { 296 | search_med_parallelism(4); 297 | search_large_parallelism(4); 298 | } 299 | 300 | #[test] 301 | fn search_quin() { 302 | search_med_parallelism(4); 303 | search_large_parallelism(5); 304 | } 305 | 306 | #[test] 307 | fn search_large() { 308 | search_med_parallelism(4); 309 | search_large_parallelism(16); 310 | } 311 | } 312 | --------------------------------------------------------------------------------