├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── snapcraft
    ├── .gitignore
    └── snapcraft.yaml
└── src
    ├── bin
        ├── interactive.rs
        ├── main.rs
        ├── mod.rs
        └── opts.rs
    ├── lib.rs
    ├── score
        ├── config.rs
        └── mod.rs
    └── search
        └── mod.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | **/*.rs.bk
3 | /Cargo.lock


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "fzyr"
 3 | version = "0.2.0"
 4 | homepage = "https://github.com/jmaargh/fzyr"
 5 | authors = ["jmaargh <https://github.com/jmaargh>"]
 6 | 
 7 | description = "A fuzzy(er) finder 🔎"
 8 | repository = "https://github.com/jmaargh/fzyr"
 9 | # TODO:
10 | # documentation = ""
11 | 
12 | readme = "README.md"
13 | categories = ["algorithms", "command-line-utilities"]
14 | keywords = ["fuzzy", "finder", "find", "search"]
15 | license-file = "LICENSE"
16 | 
17 | autobins = false
18 | 
19 | 
20 | [lib]
21 | name = "fzyr"
22 | 
23 | 
24 | [[bin]]
25 | name = "fzyr"
26 | path = "src/bin/main.rs"
27 | doc = false
28 | 
29 | 
30 | [dependencies]
31 | ndarray = "^0.11.2"
32 | itertools = "^0.7.8"
33 | crossbeam = "^0.4.1"
34 | bit-vec = "^0.5.0"
35 | clap = "^2.32.0"
36 | console = "^0.6.1"
37 | 
38 | 
39 | [profile.release]
40 | opt-level = 3
41 | debug = false
42 | lto = true
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright 2018 John-Mark Allen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 9 | of the Software, and to permit persons to whom the Software is furnished to do
10 | so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fzyr
 2 | 
 3 | **fzyr** is a simple and fast fuzzy text search. It exists as both a Rust library and a standalone executable.
 4 | 
 5 | Basically [fzy](https://github.com/jhawthorn/fzy) re-written in [Rust](https://www.rust-lang.org/).
 6 | 
 7 | ## Why?
 8 | 
 9 | `fzyr` exists because I wanted a fuzzy finder library while learning Rust. However, you may find that it useful for your purposes
10 | 
11 | `fzyr` is very similar to `fzy`, so inherits its advantages (at least as of Aug 2018). For most purposes it should be usable as a drop-in replacement.
12 | 
13 | Advantages over `fzy`:
14 | + It's works on Windows! Or at least it should, that's not actually been tested yet, let me know if it doesn't 🖥
15 | + It works with all unicode strings! Hello, rest of the world 🗺️
16 | + You can easily install with [Cargo](https://doc.rust-lang.org/stable/cargo/)! Cross-platform package management 📦
17 | + It's a Rust library! Use the algorithm in your own projects 😀
18 | 
19 | Disadvantages over `fzy`:
20 | + It's less-well tested
21 | + It doesn't support arbitrary tty i/o (only stdin/stdout)
22 | + Interactive mode needs more work
23 | 
24 | ## Installation
25 | 
26 | # [Cargo](https://doc.rust-lang.org/stable/cargo/)
27 | 
28 | You can install on any supported platform using Cargo, Rust's excellent pacakge
29 | manager.
30 | 
31 |     $ cargo install fzyr
32 | 
33 | # Linux
34 | 
35 | You can use Cargo, or if you'd prefer install as a [snap](https://snapcraft.io/)
36 | 
37 |     $ snap install fzyr
38 | 
39 | # Homebrew
40 | 
41 | Might arrive at some point...
42 | 
43 | # Windows
44 | 
45 | Use Cargo
46 | 
47 | ## Usage
48 | 
49 | Check out [fzy](https://github.com/jhawthorn/fzy#usage) for some usage examples.
50 | 
51 | To search for lines containing "something" in a file:
52 | 
53 |     $ cat very-long-file | fzyr -q something
54 | 
55 | To search interactively for a file:
56 | 
57 |     $ find . -type f | fzyr
58 | 
59 | Explore the options with:
60 | 
61 |     $ fzyr -h
62 | 
63 | ## Library documentation
64 | 
65 | Coming soon...
66 | 
67 | ## Algorithm
68 | 
69 | The alorithm is near-identical to that of `fzy`. That means:
70 | + Search is case-insensitive (all characters are converted to their unicode-defined lowercase version, if one exists)
71 | + Results must contain the entire query string, in the right order, but without the letters necessarily being consecutive
72 | + Results are all given a numerical score, and returned in best-score-first order
73 | + Prefers consecutive characters and characters that start words/filenames
74 | + Prefers shorter results
75 | 
76 | ## To-do list
77 | 
78 | Feel free to make a PR if you're so moved
79 | + Improve interactive mode
80 | + Library documentation
81 | + Tests for `search_locate()`
82 | + Integration tests
83 | + Benchmarks
84 | + Package for various OSs
85 | + Zero-allocation search
86 | + Arbitrary tty i/o
87 | 


--------------------------------------------------------------------------------
/snapcraft/.gitignore:
--------------------------------------------------------------------------------
1 | /snap/
2 | /parts/
3 | /prime/
4 | /stage/
5 | 
6 | fzyr_*_*.snap


--------------------------------------------------------------------------------
/snapcraft/snapcraft.yaml:
--------------------------------------------------------------------------------
 1 | name: fzyr
 2 | version: 0.2.0
 3 | summary: Fast fuzzy unicode text search
 4 | description: |
 5 |   **fzyr** is a simple and fast fuzzy text search. It exists as both a Rust 
 6 |   library and a standalone executable. Basically fzy re-written in Rust.
 7 | 
 8 | confinement: strict
 9 | 
10 | grade: stable
11 | 
12 | apps:
13 |   fzyr:
14 |     command: fzyr
15 | 
16 | parts:
17 |   fzyr:
18 |     source: ..
19 |     plugin: rust
20 |     build-attributes: [no-system-libraries]
21 | 


--------------------------------------------------------------------------------
/src/bin/interactive.rs:
--------------------------------------------------------------------------------
  1 | extern crate console;
  2 | 
  3 | use io;
  4 | use std::io::Write;
  5 | 
  6 | use self::console::{Key, Style, Term};
  7 | 
  8 | use fzyr::config::SCORE_MIN;
  9 | use fzyr::{search_locate, LocateResult, LocateResults};
 10 | 
 11 | use super::opts;
 12 | 
 13 | pub fn run(candidates: &[&str], options: &opts::Options) -> i32 {
 14 |   let mut terminal = Terminal::new(&options.prompt, options.show_scores, options.lines);
 15 | 
 16 |   if let Err(_) = terminal.run(candidates, options.parallelism) {
 17 |     eprintln!("Failed to write to stdout");
 18 |     1
 19 |   } else {
 20 |     0
 21 |   }
 22 | }
 23 | 
 24 | struct Terminal<'a> {
 25 |   result_count: usize,
 26 |   max_display_width: usize,
 27 |   prompt: &'a str,
 28 |   show_scores: bool,
 29 |   drawn_lines: usize,
 30 |   term: Term,
 31 |   standout: Style,
 32 | }
 33 | 
 34 | impl<'a> Terminal<'a> {
 35 |   fn new(prompt: &'a str, show_scores: bool, max_results: usize) -> Self {
 36 |     let term = Term::stdout();
 37 |     let size = term.size();
 38 |     Self {
 39 |       result_count: max_results.min((size.0 as usize).saturating_sub(1)),
 40 |       max_display_width: size.1 as usize,
 41 |       prompt: prompt,
 42 |       show_scores: show_scores,
 43 |       drawn_lines: 0,
 44 |       term: term,
 45 |       standout: Style::new().reverse(),
 46 |     }
 47 |   }
 48 | }
 49 | 
 50 | impl<'a> Terminal<'a> {
 51 |   fn run(&mut self, candidates: &[&str], parallelism: usize) -> io::Result<()> {
 52 |     let mut query = String::with_capacity(opts::DEFLT_STRING_BUFFER_LEN);
 53 | 
 54 |     let mut should_search = true;
 55 |     loop {
 56 |       if should_search {
 57 |         let search_results = search_locate(&query, candidates, parallelism);
 58 |         self.draw(&query, candidates, &search_results)?;
 59 |       }
 60 | 
 61 |       should_search = match self.term.read_key()? {
 62 |         Key::Char(ch) if ch == '\u{08}' || ch == '\u{7f}' => match query.pop() {
 63 |           // Backspace or delete
 64 |           Some(_) => true,
 65 |           None => false,
 66 |         },
 67 |         Key::Char(ch) => {
 68 |           query.push(ch);
 69 |           true
 70 |         }
 71 |         _ => false,
 72 |       };
 73 |     }
 74 |   }
 75 | 
 76 |   fn draw(&mut self, query: &str, candidates: &[&str], results: &LocateResults) -> io::Result<()> {
 77 |     self.clear()?;
 78 |     self.draw_query(query)?;
 79 |     self.draw_results(candidates, results)?;
 80 |     Ok(())
 81 |   }
 82 | 
 83 |   fn clear(&mut self) -> io::Result<()> {
 84 |     self.term.clear_line()?;
 85 |     self.term.clear_last_lines(if self.drawn_lines > 1 {
 86 |       self.drawn_lines.checked_sub(1).unwrap_or(0)
 87 |     } else {
 88 |       self.drawn_lines
 89 |     })?;
 90 |     self.drawn_lines = 0;
 91 |     Ok(())
 92 |   }
 93 | 
 94 |   fn draw_query(&mut self, query: &str) -> io::Result<()> {
 95 |     writeln!(
 96 |       self.term,
 97 |       "{}{}{}",
 98 |       self.prompt,
 99 |       query,
100 |       self.standout.apply_to(" "),
101 |     )?;
102 |     self.drawn_lines += 1;
103 |     Ok(())
104 |   }
105 | 
106 |   fn draw_results(&mut self, candidates: &[&str], results: &LocateResults) -> io::Result<()> {
107 |     // Write the results
108 |     let total_results = results.len().min(self.result_count);
109 |     let mut line_count: usize = 0;
110 |     for result in results.iter().take(total_results) {
111 |       if line_count > 0 {
112 |         self.term.write_line("")?;
113 |       }
114 |       self.draw_result(candidates, result)?;
115 |       line_count += 1;
116 |       self.drawn_lines += 1;
117 |     }
118 | 
119 |     // Write empty lines for the rest
120 |     while line_count < total_results {
121 |       self.draw_empty()?;
122 |       line_count += 1;
123 |       self.drawn_lines += 1;
124 |     }
125 | 
126 |     Ok(())
127 |   }
128 | 
129 |   fn draw_empty(&mut self) -> io::Result<()> {
130 |     self.term.write_line("")
131 |   }
132 | 
133 |   fn draw_result(&mut self, candidates: &[&str], result: &LocateResult) -> io::Result<()> {
134 |     let mut spent_width = 0;
135 | 
136 |     if self.show_scores {
137 |       if result.score == SCORE_MIN {
138 |         write!(self.term, "(     ) ")?;
139 |       } else {
140 |         write!(self.term, "({:5.2}) ", result.score)?;
141 |       }
142 |       spent_width += 8;
143 |     }
144 | 
145 |     let found = candidates[result.candidate_index];
146 |     for (i, ch) in found
147 |       .chars()
148 |       .take(self.max_display_width - spent_width)
149 |       .enumerate()
150 |     {
151 |       if result.match_mask[i] {
152 |         write!(self.term, "{}", self.standout.apply_to(ch))?;
153 |       } else {
154 |         write!(self.term, "{}", ch)?;
155 |       }
156 |     }
157 | 
158 |     Ok(())
159 |   }
160 | }
161 | 


--------------------------------------------------------------------------------
/src/bin/main.rs:
--------------------------------------------------------------------------------
 1 | extern crate fzyr;
 2 | 
 3 | mod interactive;
 4 | mod opts;
 5 | 
 6 | use std::io;
 7 | use std::process;
 8 | 
 9 | use fzyr::config::SCORE_MIN;
10 | use fzyr::search_score;
11 | 
12 | fn candidates_from_stdin() -> Vec<String> {
13 |   let stdin = io::stdin();
14 | 
15 |   let mut out = Vec::new();
16 |   let mut buff = String::with_capacity(opts::DEFLT_STRING_BUFFER_LEN);
17 |   while let Ok(bytes) = stdin.read_line(&mut buff) {
18 |     if bytes == 0 {
19 |       break;
20 |     }
21 |     out.push(buff.clone());
22 |     buff.clear();
23 |   }
24 | 
25 |   out
26 | }
27 | 
28 | fn to_slices<'src>(strings: &'src Vec<String>) -> Vec<&'src str> {
29 |   strings
30 |     .iter()
31 |     .map(|s| s.trim())
32 |     .filter(|s| !s.is_empty())
33 |     .collect()
34 | }
35 | 
36 | fn run() -> i32 {
37 |   let options = opts::cmd_parse();
38 | 
39 |   if options.benchmark > 0 && options.query.is_empty() {
40 |     println!("To benchmark, provide a query with one of the -q/-e/--query/--show-matches flags");
41 |     return 1;
42 |   }
43 | 
44 |   let candidates = candidates_from_stdin();
45 |   let candidates = to_slices(&candidates);
46 | 
47 |   if options.benchmark > 0 {
48 |     // Run a benchmarking run without output
49 |     for _ in 0..options.benchmark {
50 |       search_score(&options.query, &candidates, options.parallelism);
51 |     }
52 |     0
53 |   } else if !options.query.is_empty() {
54 |     // Run printing to stdout
55 |     let results = search_score(&options.query, &candidates, options.parallelism);
56 |     for result in results.iter().take(options.lines) {
57 |       if options.show_scores {
58 |         if result.score == SCORE_MIN {
59 |           print!("(     ) ");
60 |         } else {
61 |           print!("({:5.2}) ", result.score);
62 |         }
63 |         println!("{}", candidates[result.candidate_index]);
64 |       }
65 |     }
66 |     0
67 |   } else {
68 |     // Run interactively
69 |     interactive::run(&candidates, &options)
70 |   }
71 | }
72 | 
73 | fn main() {
74 |   process::exit(run());
75 | }
76 | 


--------------------------------------------------------------------------------
/src/bin/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod interactive;
2 | pub mod opts;
3 | 


--------------------------------------------------------------------------------
/src/bin/opts.rs:
--------------------------------------------------------------------------------
  1 | extern crate clap;
  2 | 
  3 | use self::clap::{App, Arg};
  4 | 
  5 | pub const NAME: &'static str = env!("CARGO_PKG_NAME");
  6 | pub const VERSION: &'static str = env!("CARGO_PKG_VERSION");
  7 | pub const WEBSITE: &'static str = env!("CARGO_PKG_HOMEPAGE");
  8 | pub const DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION");
  9 | 
 10 | pub const DEFLT_STRING_BUFFER_LEN: usize = 128;
 11 | 
 12 | #[derive(Debug)]
 13 | pub struct Options {
 14 |   pub query: String,
 15 |   pub lines: usize,
 16 |   pub show_scores: bool,
 17 |   pub parallelism: usize,
 18 |   pub prompt: String,
 19 |   pub benchmark: usize,
 20 | }
 21 | 
 22 | impl Default for Options {
 23 |   fn default() -> Self {
 24 |     Self {
 25 |       query: String::new(),
 26 |       lines: 10,
 27 |       show_scores: false,
 28 |       parallelism: 4,
 29 |       prompt: "> ".to_string(),
 30 |       benchmark: 0,
 31 |     }
 32 |   }
 33 | }
 34 | 
 35 | pub fn cmd_parse() -> Options {
 36 |   let mut out = Options::default();
 37 | 
 38 |   let deflt_query = out.query.to_string();
 39 |   let deflt_lines = out.lines.to_string();
 40 |   let deflt_parallelism = out.parallelism.to_string();
 41 |   let deflt_prompt = out.prompt.to_string();
 42 |   let deflt_benchmark = out.benchmark.to_string();
 43 | 
 44 |   let long_about: String = format!("{}\n[{}]", DESCRIPTION, WEBSITE);
 45 | 
 46 |   let matches = App::new(NAME)
 47 |     .version(VERSION)
 48 |     .about(DESCRIPTION)
 49 |     .long_about(long_about.as_ref())
 50 |     .arg(
 51 |       Arg::with_name("query")
 52 |         .short("q")
 53 |         .long("query")
 54 |         .value_name("QUERY")
 55 |         .default_value(&deflt_query)
 56 |         .help("Query string to search for"),
 57 |     )
 58 |     .arg(
 59 |       Arg::with_name("lines")
 60 |         .short("l")
 61 |         .long("lines")
 62 |         .value_name("LINES")
 63 |         .default_value(&deflt_lines)
 64 |         .help("Number of output lines to display"),
 65 |     )
 66 |     .arg(
 67 |       Arg::with_name("show-scores")
 68 |         .short("s")
 69 |         .long("show-scores")
 70 |         .help("Show numerical scores for each match"),
 71 |     )
 72 |     .arg(
 73 |       Arg::with_name("parallelism")
 74 |         .short("j")
 75 |         .long("parallelism")
 76 |         .value_name("THREADS")
 77 |         .default_value(&deflt_parallelism)
 78 |         .help("Maximum number of worker threads to use"),
 79 |     )
 80 |     .arg(
 81 |       Arg::with_name("prompt")
 82 |         .short("p")
 83 |         .long("prompt")
 84 |         .value_name("PROMPT")
 85 |         .default_value(&deflt_prompt)
 86 |         .help("Propmt to show when entering queries"),
 87 |     )
 88 |     .arg(
 89 |       Arg::with_name("benchmark")
 90 |         .short("b")
 91 |         .long("benchmark")
 92 |         .value_name("REPEATS")
 93 |         .default_value(&deflt_benchmark)
 94 |         .help("Set to a positive value to run that many repeated searches for benchmarking"),
 95 |     )
 96 |     .arg(
 97 |       Arg::with_name("workers")
 98 |         .long("workers")
 99 |         .value_name("THREADS")
100 |         .help("Identical to \"--parallelism\""),
101 |     )
102 |     .arg(
103 |       Arg::with_name("show-matches")
104 |         .short("e")
105 |         .long("show-matches")
106 |         .value_name("QUERY")
107 |         .help("Identical to \"--query\""),
108 |     )
109 |     .get_matches();
110 | 
111 |   out.query = if matches.is_present("query") {
112 |     matches.value_of("query").unwrap().to_string()
113 |   } else if matches.is_present("show-matches") {
114 |     matches.value_of("show-matches").unwrap().to_string()
115 |   } else {
116 |     out.query
117 |   };
118 |   out.lines = matches
119 |     .value_of("lines")
120 |     .unwrap_or(&deflt_query)
121 |     .parse()
122 |     .unwrap_or(out.lines);
123 |   out.show_scores = matches.is_present("show-scores");
124 |   out.parallelism = {
125 |     if matches.is_present("parallelism") {
126 |       matches.value_of("parallelism").unwrap()
127 |     } else if matches.is_present("workers") {
128 |       matches.value_of("workers").unwrap()
129 |     } else {
130 |       &deflt_parallelism
131 |     }
132 |   }.parse()
133 |     .unwrap_or(out.parallelism);
134 |   out.prompt = matches
135 |     .value_of("prompt")
136 |     .unwrap_or(&out.prompt)
137 |     .to_string();
138 |   out.benchmark = matches
139 |     .value_of("benchmark")
140 |     .unwrap_or(&deflt_benchmark)
141 |     .parse()
142 |     .unwrap_or(out.benchmark);
143 | 
144 |   out
145 | }
146 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | mod score;
2 | mod search;
3 | 
4 | pub use score::{config, has_match, locate, score, LocateResult, Score, ScoreResult};
5 | pub use search::{search_locate, search_score, LocateResults, ScoreResults};
6 | 


--------------------------------------------------------------------------------
/src/score/config.rs:
--------------------------------------------------------------------------------
 1 | extern crate std;
 2 | 
 3 | use std::f64;
 4 | 
 5 | use score::Score;
 6 | 
 7 | pub const SCORE_MIN: Score = f64::NEG_INFINITY;
 8 | pub const SCORE_MAX: Score = f64::INFINITY;
 9 | 
10 | pub const SCORE_GAP_LEADING: Score = -0.005;
11 | pub const SCORE_GAP_INNER: Score = -0.01;
12 | pub const SCORE_GAP_TRAILING: Score = -0.005;
13 | 
14 | pub const SCORE_MATCH_CONSECUTIVE: Score = 1.0;
15 | pub const SCORE_MATCH_SLASH: Score = 0.9;
16 | pub const SCORE_MATCH_WORD: Score = 0.8;
17 | pub const SCORE_MATCH_CAPITAL: Score = 0.7;
18 | pub const SCORE_MATCH_DOT: Score = 0.6;
19 | 
20 | pub const CANDIDATE_MAX_BYTES: usize = 2048;
21 | pub const CANDIDATE_MAX_CHARS: usize = 1024;
22 | 
23 | #[cfg(test)]
24 | mod tests {
25 |   use super::*;
26 | 
27 |   fn assert_positive(val: f64) {
28 |     assert!(val > 0.0);
29 |   }
30 | 
31 |   fn assert_negative(val: f64) {
32 |     assert!(val < 0.0);
33 |   }
34 | 
35 |   #[test]
36 |   fn positive_scores() {
37 |     assert_positive(SCORE_MAX);
38 |     assert_positive(SCORE_MATCH_CONSECUTIVE);
39 |     assert_positive(SCORE_MATCH_SLASH);
40 |     assert_positive(SCORE_MATCH_WORD);
41 |     assert_positive(SCORE_MATCH_CAPITAL);
42 |     assert_positive(SCORE_MATCH_DOT);
43 |   }
44 | 
45 |   #[test]
46 |   fn negative_scores() {
47 |     assert_negative(SCORE_MIN);
48 |     assert_negative(SCORE_GAP_LEADING);
49 |     assert_negative(SCORE_GAP_INNER);
50 |     assert_negative(SCORE_GAP_TRAILING);
51 |   }
52 | 
53 |   #[test]
54 |   fn non_zero() {
55 |     assert_ne!(0, CANDIDATE_MAX_BYTES);
56 |     assert_ne!(0, CANDIDATE_MAX_CHARS);
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/score/mod.rs:
--------------------------------------------------------------------------------
  1 | extern crate bit_vec;
  2 | extern crate ndarray;
  3 | 
  4 | pub mod config;
  5 | 
  6 | use std::cmp::Ordering;
  7 | 
  8 | use self::bit_vec::BitVec;
  9 | use self::ndarray::prelude::*;
 10 | 
 11 | use self::config::*;
 12 | 
 13 | pub type Score = f64;
 14 | type ScoreMatrix = Array2<Score>;
 15 | 
 16 | /// Result of querying the score against a candidate
 17 | #[derive(Debug)]
 18 | pub struct ScoreResult {
 19 |   pub candidate_index: usize,
 20 |   pub score: Score,
 21 | }
 22 | 
 23 | /// Result of querying the score and location against a candidate
 24 | #[derive(Debug)]
 25 | pub struct LocateResult {
 26 |   pub candidate_index: usize,
 27 |   pub score: Score,
 28 |   /// Binary mask showing where the charcaters of the query match the candidate
 29 |   pub match_mask: BitVec,
 30 | }
 31 | 
 32 | impl ScoreResult {
 33 |   pub fn new(candidate_index: usize) -> Self {
 34 |     Self::with_score(candidate_index, SCORE_MIN)
 35 |   }
 36 | 
 37 |   pub fn with_score(candidate_index: usize, score: Score) -> Self {
 38 |     Self {
 39 |       candidate_index,
 40 |       score,
 41 |     }
 42 |   }
 43 | }
 44 | 
 45 | impl PartialOrd for ScoreResult {
 46 |   fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 47 |     Some(
 48 |       self
 49 |         .score
 50 |         .partial_cmp(&other.score)
 51 |         .unwrap_or(Ordering::Less)
 52 |         .reverse(),
 53 |     )
 54 |   }
 55 | }
 56 | 
 57 | impl PartialEq for ScoreResult {
 58 |   fn eq(&self, other: &Self) -> bool {
 59 |     self.score == other.score
 60 |   }
 61 | }
 62 | 
 63 | impl LocateResult {
 64 |   pub fn new(candidate_index: usize, candidate_size: usize) -> Self {
 65 |     Self::with_score(candidate_index, candidate_size, SCORE_MIN)
 66 |   }
 67 | 
 68 |   pub fn with_score(candidate_index: usize, candidate_size: usize, score: Score) -> Self {
 69 |     Self {
 70 |       candidate_index,
 71 |       score: score,
 72 |       match_mask: BitVec::from_elem(candidate_size, false),
 73 |     }
 74 |   }
 75 | }
 76 | 
 77 | impl PartialOrd for LocateResult {
 78 |   fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 79 |     Some(
 80 |       self
 81 |         .score
 82 |         .partial_cmp(&other.score)
 83 |         .unwrap_or(Ordering::Less)
 84 |         .reverse(),
 85 |     )
 86 |   }
 87 | }
 88 | 
 89 | impl PartialEq for LocateResult {
 90 |   fn eq(&self, other: &Self) -> bool {
 91 |     self.score == other.score
 92 |   }
 93 | }
 94 | 
 95 | /// Returns `true` if and only if `candidate` is a match for `query`
 96 | ///
 97 | /// A "match" must contain all of the letters of `query` in order, but not
 98 | /// necessarily continguously.
 99 | pub fn has_match(query: &str, candidate: &str) -> bool {
100 |   let mut cand_iter = candidate.chars();
101 |   // Note: `cand_iter` will be advanced during `all`, which is short-circuiting
102 |   query
103 |     .chars()
104 |     .all(|c| cand_iter.any(|c2| c2.to_lowercase().eq(c.to_lowercase())))
105 | }
106 | 
107 | /// Calculates a score for how well a `query` matches a `candidate`
108 | ///
109 | /// Higher scores are better
110 | pub fn score(query: &str, candidate: &str) -> ScoreResult {
111 |   score_inner(query, candidate, 0)
112 | }
113 | 
114 | pub(crate) fn score_inner(query: &str, candidate: &str, index: usize) -> ScoreResult {
115 |   let (q_len, c_len) = match get_lengths(query, candidate) {
116 |     LengthsOrScore::Score(s) => return ScoreResult::with_score(index, s),
117 |     LengthsOrScore::Lengths(q, c) => (q, c),
118 |   };
119 | 
120 |   let (best_score_overall, _) = score_internal(query, candidate, q_len, c_len);
121 |   ScoreResult::with_score(index, best_score_overall[[q_len - 1, c_len - 1]])
122 | }
123 | 
124 | /// Calculates a score for how well a `query` matches a `candidate` and gives
125 | /// the locations of the `query` characters in the `candidate` too
126 | ///
127 | /// Higher scores are better
128 | pub fn locate(query: &str, candidate: &str) -> LocateResult {
129 |   locate_inner(query, candidate, 0)
130 | }
131 | 
132 | pub(crate) fn locate_inner(query: &str, candidate: &str, index: usize) -> LocateResult {
133 |   let candidate_chars = candidate.chars().count();
134 |   let (q_len, c_len) = match get_lengths(query, candidate) {
135 |     LengthsOrScore::Score(s) => {
136 |       let mut out = LocateResult::with_score(index, candidate_chars, s);
137 |       if s == SCORE_MAX {
138 |         // This was an exact match
139 |         out.match_mask.set_all();
140 |       }
141 |       return out;
142 |     }
143 |     LengthsOrScore::Lengths(q, c) => (q, c),
144 |   };
145 | 
146 |   let (best_score_overall, best_score_w_ending) = score_internal(query, candidate, q_len, c_len);
147 |   let mut out = LocateResult::with_score(index, candidate_chars, best_score_overall[[q_len - 1, c_len - 1]]);
148 | 
149 |   let mut query_iter = query.chars();
150 |   let mut cand_iter = candidate.chars();
151 |   // Safe because we'll return at the beginning for zero or unit length
152 |   let mut i = q_len;
153 |   let mut j = c_len;
154 |   while query_iter.next_back() != None {
155 |     i = i.wrapping_sub(1);
156 |     while cand_iter.next_back() != None {
157 |       j = j.wrapping_sub(1);
158 |       if best_score_w_ending[[i, j]] != SCORE_MIN
159 |         && best_score_w_ending[[i, j]] == best_score_overall[[i, j]]
160 |       {
161 |         // There's a match here that was on an optimal path
162 |         out.match_mask.set(j, true);
163 |         break; // Go to the next query letter
164 |       }
165 |     }
166 |   }
167 | 
168 |   out
169 | }
170 | 
171 | enum LengthsOrScore {
172 |   Lengths(usize, usize),
173 |   Score(self::Score),
174 | }
175 | 
176 | fn get_lengths(query: &str, candidate: &str) -> LengthsOrScore {
177 |   if candidate.len() > CANDIDATE_MAX_BYTES || query.len() == 0 {
178 |     // Candidate too long or query too short
179 |     return LengthsOrScore::Score(SCORE_MIN);
180 |   }
181 | 
182 |   let q_len = query.chars().count();
183 |   let c_len = candidate.chars().count();
184 | 
185 |   if q_len == c_len {
186 |     // This is only called when there _is_ a match (candidate contains all
187 |     // chars of query in the right order, so equal lengths mean equal
188 |     // strings
189 |     return LengthsOrScore::Score(SCORE_MAX);
190 |   }
191 | 
192 |   if c_len > CANDIDATE_MAX_CHARS {
193 |     // Too many characters
194 |     return LengthsOrScore::Score(SCORE_MIN);
195 |   }
196 | 
197 |   LengthsOrScore::Lengths(q_len, c_len)
198 | }
199 | 
200 | fn score_internal(
201 |   query: &str,
202 |   candidate: &str,
203 |   q_len: usize,
204 |   c_len: usize,
205 | ) -> (ScoreMatrix, ScoreMatrix) {
206 |   let match_bonuses = candidate_match_bonuses(candidate);
207 | 
208 |   // Matrix of the best score for each position ending in a match
209 |   let mut best_score_w_ending = ScoreMatrix::zeros((q_len, c_len));
210 |   // Matrix for the best score for each position.
211 |   let mut best_score_overall = ScoreMatrix::zeros((q_len, c_len));
212 | 
213 |   for (i, q_char) in query.chars().enumerate() {
214 |     let mut prev_score = SCORE_MIN;
215 |     let gap_score = if i == q_len - 1 {
216 |       SCORE_GAP_TRAILING
217 |     } else {
218 |       SCORE_GAP_INNER
219 |     };
220 | 
221 |     for (j, c_char) in candidate.chars().enumerate() {
222 |       if q_char.to_lowercase().eq(c_char.to_lowercase()) {
223 |         // Get the score bonus for matching this char
224 |         let score = if i == 0 {
225 |           // Beginning of the query, penalty for leading gap
226 |           (j as f64 * SCORE_GAP_LEADING) + match_bonuses[j]
227 |         } else if j != 0 {
228 |           // Middle of both query and candidate
229 |           // Either give it the match bonus, or use the consecutive
230 |           // match (which wil always be higher, but doesn't stack
231 |           // with match bonus)
232 |           (best_score_overall[[i - 1, j - 1]] + match_bonuses[j])
233 |             .max(best_score_w_ending[[i - 1, j - 1]] + SCORE_MATCH_CONSECUTIVE)
234 |         } else {
235 |           SCORE_MIN
236 |         };
237 | 
238 |         prev_score = score.max(prev_score + gap_score);
239 |         best_score_overall[[i, j]] = prev_score;
240 |         best_score_w_ending[[i, j]] = score;
241 |       } else {
242 |         // Give the score penalty for the gap
243 |         prev_score = prev_score + gap_score;
244 |         best_score_overall[[i, j]] = prev_score;
245 |         // We don't end in a match
246 |         best_score_w_ending[[i, j]] = SCORE_MIN;
247 |       }
248 |     }
249 |   }
250 | 
251 |   (best_score_overall, best_score_w_ending)
252 | }
253 | 
254 | fn candidate_match_bonuses(candidate: &str) -> Vec<Score> {
255 |   let mut prev_char = '/';
256 |   candidate
257 |     .chars()
258 |     .map(|current| {
259 |       let s = character_match_bonus(current, prev_char);
260 |       prev_char = current;
261 |       s
262 |     })
263 |     .collect()
264 | }
265 | 
266 | fn character_match_bonus(current: char, previous: char) -> Score {
267 |   if current.is_uppercase() && previous.is_lowercase() {
268 |     SCORE_MATCH_CAPITAL
269 |   } else {
270 |     match previous {
271 |       '/' => SCORE_MATCH_SLASH,
272 |       '.' => SCORE_MATCH_DOT,
273 |       _ if is_separator(previous) => SCORE_MATCH_WORD,
274 |       _ => 0.0,
275 |     }
276 |   }
277 | }
278 | 
279 | fn is_separator(character: char) -> bool {
280 |   match character {
281 |     ' ' => true,
282 |     '-' => true,
283 |     '_' => true,
284 |     _ => false,
285 |   }
286 | }
287 | 
288 | #[cfg(test)]
289 | mod tests {
290 |   use super::*;
291 | 
292 |   #[test]
293 |   fn exact_match() {
294 |     assert!(has_match("query", "query"));
295 |     assert!(has_match(
296 |       "156aufsdn926f9=sdk/~']",
297 |       "156aufsdn926f9=sdk/~']"
298 |     ));
299 |     assert!(has_match(
300 |       "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫",
301 |       "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫"
302 |     ));
303 |   }
304 | 
305 |   #[test]
306 |   fn paratial_match() {
307 |     assert!(has_match("ca", "candidate"));
308 |     assert!(has_match("cat", "candidate"));
309 |     assert!(has_match("ndt", "candidate"));
310 |     assert!(has_match("nate", "candidate"));
311 |     assert!(has_match("56aufn92=sd/~']", "156aufsdn926f9=sdk/~']"));
312 |     assert!(has_match(
313 |       "😨Ɣ·®x¯ÍĞɅƁƹ♺à☆ǈ´ƙÑ♫",
314 |       "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫"
315 |     ));
316 |   }
317 | 
318 |   #[test]
319 |   fn case_match() {
320 |     assert!(has_match("QUERY", "query"));
321 |     assert!(has_match("query", "QUERY"));
322 |     assert!(has_match("QuEry", "query"));
323 |     assert!(has_match(
324 |       "прописная буква",
325 |       "ПРОПИСНАЯ БУКВА"
326 |     ))
327 |   }
328 | 
329 |   #[test]
330 |   fn empty_match() {
331 |     assert!(has_match("", ""));
332 |     assert!(has_match("", "candidate"));
333 |     assert!(has_match(
334 |       "",
335 |       "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫"
336 |     ));
337 |     assert!(has_match("", "прописная БУКВА"));
338 |     assert!(has_match("", "a"));
339 |     assert!(has_match("", "4561"));
340 |   }
341 | 
342 |   #[test]
343 |   fn bad_match() {
344 |     assert!(!has_match("acb", "abc"));
345 |     assert!(!has_match("a", ""));
346 |     assert!(!has_match("abc", "def"));
347 |     assert!(!has_match("😨Ɣ·®x¯ÍĞ.Ʌ", "5ù¨ȼ♕☩♘⚁^"));
348 |     assert!(!has_match(
349 |       "прописная БУКВА",
350 |       "прописнаяБУКВА"
351 |     ));
352 |     assert!(!has_match(
353 |       "БУКВА прописная",
354 |       "прописная БУКВА"
355 |     ));
356 |   }
357 | 
358 |   #[test]
359 |   fn score_pref_word_start() {
360 |     assert!(score("amor", "app/models/order").score > score("amor", "app/models/zrder").score);
361 |     assert!(score("amor", "app models-order").score > score("amor", "app models zrder").score);
362 |     assert!(score("qart", "QuArTz").score > score("qart", "QuaRTz").score);
363 |   }
364 | 
365 |   #[test]
366 |   fn score_pref_consecutive_letters() {
367 |     assert!(score("amo", "app/m/foo").score < score("amo", "app/models/foo").score);
368 |   }
369 | 
370 |   #[test]
371 |   fn score_pref_contiguous_vs_word() {
372 |     assert!(score("gemfil", "Gemfile.lock").score < score("gemfil", "Gemfile").score);
373 |   }
374 | 
375 |   #[test]
376 |   fn score_pref_shorter() {
377 |     assert!(score("abce", "abcdef").score > score("abce", "abc de").score);
378 |     assert!(score("abc", "    a b c ").score > score("abc", " a  b  c ").score);
379 |     assert!(score("abc", " a b c    ").score > score("abc", " a  b  c ").score);
380 |     assert!(score("test", "tests").score > score("test", "testing").score);
381 |   }
382 | 
383 |   #[test]
384 |   fn score_prefer_start() {
385 |     assert!(score("test", "testing").score > score("test", "/testing").score);
386 |   }
387 | 
388 |   #[test]
389 |   fn score_exact() {
390 |     assert_eq!(SCORE_MAX, score("query", "query").score);
391 |     assert_eq!(
392 |       SCORE_MAX,
393 |       score("156aufsdn926f9=sdk/~']", "156aufsdn926f9=sdk/~']").score
394 |     );
395 |     assert_eq!(
396 |       SCORE_MAX,
397 |       score(
398 |         "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫",
399 |         "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫"
400 |       ).score
401 |     );
402 |   }
403 | 
404 |   #[test]
405 |   fn score_empty() {
406 |     assert_eq!(SCORE_MIN, score("", "").score);
407 |     assert_eq!(SCORE_MIN, score("", "candidate").score);
408 |     assert_eq!(
409 |       SCORE_MIN,
410 |       score(
411 |         "",
412 |         "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫"
413 |       ).score
414 |     );
415 |     assert_eq!(SCORE_MIN, score("", "прописная БУКВА").score);
416 |     assert_eq!(SCORE_MIN, score("", "a").score);
417 |     assert_eq!(SCORE_MIN, score("", "4561").score);
418 |   }
419 | 
420 |   #[test]
421 |   fn score_gaps() {
422 |     assert_eq!(SCORE_GAP_LEADING, score("a", "*a").score);
423 |     assert_eq!(SCORE_GAP_LEADING * 2.0, score("a", "*ba").score);
424 |     assert_eq!(
425 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING,
426 |       score("a", "**a*").score
427 |     );
428 |     assert_eq!(
429 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING * 2.0,
430 |       score("a", "**a**").score
431 |     );
432 |     assert_eq!(
433 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CONSECUTIVE + SCORE_GAP_TRAILING * 2.0,
434 |       score("aa", "**aa♺*").score
435 |     );
436 |     assert_eq!(
437 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_INNER + SCORE_MATCH_WORD + SCORE_GAP_TRAILING * 2.0,
438 |       score("ab", "**a-b♺*").score
439 |     );
440 |     assert_eq!(
441 |       SCORE_GAP_LEADING
442 |         + SCORE_GAP_LEADING
443 |         + SCORE_GAP_INNER
444 |         + SCORE_GAP_TRAILING
445 |         + SCORE_GAP_TRAILING,
446 |       score("aa", "**a♺a**").score
447 |     );
448 |   }
449 | 
450 |   #[test]
451 |   fn score_consecutive() {
452 |     assert_eq!(
453 |       SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE,
454 |       score("aa", "*aa").score
455 |     );
456 |     assert_eq!(
457 |       SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE * 2.0,
458 |       score("aaa", "♫aaa").score
459 |     );
460 |     assert_eq!(
461 |       SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_CONSECUTIVE,
462 |       score("aaa", "*a*aa").score
463 |     );
464 |   }
465 | 
466 |   #[test]
467 |   fn score_slash() {
468 |     assert_eq!(
469 |       SCORE_GAP_LEADING + SCORE_MATCH_SLASH,
470 |       score("a", "/a").score
471 |     );
472 |     assert_eq!(
473 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH,
474 |       score("a", "*/a").score
475 |     );
476 |     assert_eq!(
477 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH + SCORE_MATCH_CONSECUTIVE,
478 |       score("aa", "a/aa").score
479 |     );
480 |   }
481 | 
482 |   #[test]
483 |   fn score_capital() {
484 |     assert_eq!(
485 |       SCORE_GAP_LEADING + SCORE_MATCH_CAPITAL,
486 |       score("a", "bA").score
487 |     );
488 |     assert_eq!(
489 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL,
490 |       score("a", "baA").score
491 |     );
492 |     assert_eq!(
493 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL + SCORE_MATCH_CONSECUTIVE,
494 |       score("aa", "😞aAa").score
495 |     );
496 |   }
497 | 
498 |   #[test]
499 |   fn score_dot() {
500 |     assert_eq!(SCORE_GAP_LEADING + SCORE_MATCH_DOT, score("a", ".a").score);
501 |     assert_eq!(
502 |       SCORE_GAP_LEADING * 3.0 + SCORE_MATCH_DOT,
503 |       score("a", "*a.a").score
504 |     );
505 |     assert_eq!(
506 |       SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_DOT,
507 |       score("a", "♫a.a").score
508 |     );
509 |   }
510 | 
511 |   fn assert_locate_score(query: &str, candidate: &str, score: Score) {
512 |     let result = locate(query, candidate);
513 | 
514 |     assert_eq!(score, result.score);
515 |   }
516 | 
517 |   #[test]
518 |   fn locate_exact() {
519 |     assert_locate_score("query", "query", SCORE_MAX);
520 |     assert_locate_score("156aufsdn926f9=sdk/~']",
521 |       "156aufsdn926f9=sdk/~']",
522 |       SCORE_MAX,
523 |     );
524 |     assert_locate_score(
525 |         "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫",
526 |       "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫",
527 |       SCORE_MAX,
528 |     );
529 |   }
530 | 
531 |   #[test]
532 |   fn locate_empty() {
533 |     assert_locate_score("", "", SCORE_MIN);
534 |     assert_locate_score("", "candidate", SCORE_MIN);
535 |     assert_locate_score(
536 |         "",
537 |         "😨Ɣ·®x¯ÍĞ.ɅƁñîƹ♺àwÑ☆ǈ😞´ƙºÑ♫, ",
538 |       SCORE_MIN,
539 |     );
540 |     assert_locate_score("", "прописная БУКВА", SCORE_MIN);
541 |     assert_locate_score("", "a", SCORE_MIN);
542 |     assert_locate_score("", "4561", SCORE_MIN);
543 |   }
544 | 
545 |   #[test]
546 |   fn locate_gaps() {
547 |     assert_locate_score("a", "*a", SCORE_GAP_LEADING);
548 |     assert_locate_score("a", "*ba", SCORE_GAP_LEADING * 2.0);
549 |     assert_locate_score("a", "**a*",
550 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING,
551 |     );
552 |     assert_locate_score("a", "**a**",
553 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_TRAILING * 2.0,
554 |     );
555 |     assert_locate_score("aa", "**aa♺*",
556 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CONSECUTIVE + SCORE_GAP_TRAILING * 2.0,
557 |     );
558 |     assert_locate_score("ab", "**a-b♺*",
559 |       SCORE_GAP_LEADING * 2.0 + SCORE_GAP_INNER + SCORE_MATCH_WORD + SCORE_GAP_TRAILING * 2.0,
560 |     );
561 |     assert_locate_score("aa", "**a♺a**",
562 |       SCORE_GAP_LEADING
563 |         + SCORE_GAP_LEADING
564 |         + SCORE_GAP_INNER
565 |         + SCORE_GAP_TRAILING
566 |         + SCORE_GAP_TRAILING,
567 |     );
568 |   }
569 | 
570 |   #[test]
571 |   fn locate_consecutive() {
572 |     assert_locate_score("aa", "*aa",
573 |       SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE,
574 |     );
575 |     assert_locate_score("aaa", "♫aaa",
576 |       SCORE_GAP_LEADING + SCORE_MATCH_CONSECUTIVE * 2.0,
577 |     );
578 |     assert_locate_score("aaa", "*a*aa",
579 |       SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_CONSECUTIVE,
580 |     );
581 |   }
582 | 
583 |   #[test]
584 |   fn locate_slash() {
585 |     assert_locate_score("a", "/a",
586 |       SCORE_GAP_LEADING + SCORE_MATCH_SLASH,
587 |     );
588 |     assert_locate_score("a", "*/a",
589 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH,
590 |     );
591 |     assert_locate_score("aa", "a/aa",
592 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_SLASH + SCORE_MATCH_CONSECUTIVE,
593 |     );
594 |   }
595 | 
596 |   #[test]
597 |   fn locate_capital() {
598 |     assert_locate_score("a", "bA",
599 |       SCORE_GAP_LEADING + SCORE_MATCH_CAPITAL,
600 |     );
601 |     assert_locate_score("a", "baA",
602 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL,
603 |     );
604 |     assert_locate_score("aa", "😞aAa",
605 |       SCORE_GAP_LEADING * 2.0 + SCORE_MATCH_CAPITAL + SCORE_MATCH_CONSECUTIVE,
606 |     );
607 |   }
608 | 
609 |   #[test]
610 |   fn locate_dot() {
611 |     assert_locate_score("a", ".a", SCORE_GAP_LEADING + SCORE_MATCH_DOT);
612 |     assert_locate_score("a", "*a.a",
613 |       SCORE_GAP_LEADING * 3.0 + SCORE_MATCH_DOT,
614 |     );
615 |     assert_locate_score("a", "♫a.a",
616 |       SCORE_GAP_LEADING + SCORE_GAP_INNER + SCORE_MATCH_DOT,
617 |     );
618 |   }
619 | 
620 | }
621 | 


--------------------------------------------------------------------------------
/src/search/mod.rs:
--------------------------------------------------------------------------------
  1 | extern crate crossbeam;
  2 | extern crate itertools;
  3 | 
  4 | use std::cmp::Ordering;
  5 | use std::usize;
  6 | 
  7 | use self::crossbeam::channel;
  8 | use self::crossbeam::scope as thread_scope;
  9 | use self::itertools::kmerge;
 10 | 
 11 | use score::{has_match, locate_inner, score_inner, LocateResult, ScoreResult};
 12 | 
 13 | /// Collection of scores and the candidates they apply to
 14 | pub type ScoreResults = Vec<ScoreResult>;
 15 | /// Collection of scores, locations, and the candidates they apply to
 16 | pub type LocateResults = Vec<LocateResult>;
 17 | 
 18 | /// Search among a collection of candidates using the given query, returning
 19 | /// an ordered collection of results (highest score first)
 20 | pub fn search_score(
 21 |   query: &str,
 22 |   candidates: &[&str],
 23 |   parallelism: usize,
 24 | ) -> ScoreResults {
 25 |   search_internal(query, candidates, parallelism, score_inner).collect()
 26 | }
 27 | 
 28 | /// Search among a collection of candidates using the given query, returning
 29 | /// an ordered collection of results (highest score first) with the locations
 30 | /// of the query in each candidate
 31 | pub fn search_locate(
 32 |   query: &str,
 33 |   candidates: &[&str],
 34 |   parallelism: usize,
 35 | ) -> LocateResults {
 36 |   search_internal(query, candidates, parallelism, locate_inner).collect()
 37 | }
 38 | 
 39 | fn search_internal<T>(
 40 |   query: &str,
 41 |   candidates: &[&str],
 42 |   parallelism: usize,
 43 |   search_fn: fn(&str, &str, usize) -> T,
 44 | ) -> Box<dyn Iterator<Item = T>>
 45 | where
 46 |   T: PartialOrd + Sized + Send + 'static,
 47 | {
 48 |   let parallelism = calculate_parallelism(candidates.len(), parallelism, query.is_empty());
 49 |   let mut candidates = candidates;
 50 |   let (sender, receiver) = channel::bounded::<Vec<T>>(parallelism);
 51 | 
 52 |   if parallelism < 2 {
 53 |     Box::new(search_worker(candidates, query, 0, search_fn).into_iter())
 54 |   } else {
 55 |     thread_scope(|scope| {
 56 |       let mut remaining_candidates = candidates.len();
 57 |       let per_thread_count = ceil_div(remaining_candidates, parallelism);
 58 |       let mut thread_offset = 0;
 59 | 
 60 |       // Create "parallelism" threads
 61 |       while remaining_candidates > 0 {
 62 |         // Search in this thread's share
 63 |         let split = if remaining_candidates >= per_thread_count {
 64 |           remaining_candidates -= per_thread_count;
 65 |           per_thread_count
 66 |         } else {
 67 |           remaining_candidates = 0;
 68 |           remaining_candidates
 69 |         };
 70 |         let split = candidates.split_at(split);
 71 |         let splitted_len = split.0.len();
 72 |         let sender = sender.clone();
 73 |         scope.spawn(move || {
 74 |           sender.send(search_worker(split.0, query, thread_offset, search_fn));
 75 |         });
 76 |         thread_offset += splitted_len;
 77 | 
 78 |         // Remove that share from the candidate slice
 79 |         candidates = split.1;
 80 |       }
 81 | 
 82 |       drop(sender);
 83 |     });
 84 | 
 85 |     Box::new(kmerge(receiver))
 86 |   }
 87 | }
 88 | 
 89 | // Search among candidates against a query in a single thread
 90 | fn search_worker<T>(
 91 |   candidates: &[&str],
 92 |   query: &str,
 93 |   offset_index: usize,
 94 |   search_fn: fn(&str, &str, usize) -> T
 95 | ) -> Vec<T>
 96 | where
 97 |   T: PartialOrd,
 98 | {
 99 |   let mut out = Vec::with_capacity(candidates.len());
100 |   for (index, candidate) in candidates.into_iter().enumerate() {
101 |     if has_match(&query, candidate) {
102 |       out.push(search_fn(&query, candidate, offset_index + index));
103 |     }
104 |   }
105 |   out.sort_unstable_by(|result1, result2| result1.partial_cmp(result2).unwrap_or(Ordering::Less));
106 | 
107 |   out
108 | }
109 | 
110 | fn calculate_parallelism(
111 |   candidate_count: usize,
112 |   configured_parallelism: usize,
113 |   empty_query: bool,
114 | ) -> usize {
115 |   if empty_query {
116 |     // No need to do much for no query
117 |     return 1;
118 |   }
119 | 
120 |   // Use a ramp up to avoid unecessarily starting threads with few candidates
121 |   let ramped_parallelism = match candidate_count {
122 |     n if n < 17 => ceil_div(n, 4),
123 |     n if n > 32 => ceil_div(n, 8),
124 |     _ => 4,
125 |   };
126 | 
127 |   configured_parallelism
128 |     .min(ramped_parallelism)
129 |     .min(candidate_count)
130 |     .max(1)
131 | }
132 | 
133 | /// Integer ceiling division
134 | fn ceil_div(a: usize, b: usize) -> usize {
135 |   (a + b - 1) / b
136 | }
137 | 
138 | #[cfg(test)]
139 | mod tests {
140 |   use super::*;
141 | 
142 |   #[test]
143 |   fn parallelism_ramp() {
144 |     assert_eq!(1, calculate_parallelism(0, 0, false));
145 |     assert_eq!(1, calculate_parallelism(1, 0, false));
146 |     assert_eq!(1, calculate_parallelism(0, 1, false));
147 |     assert_eq!(1, calculate_parallelism(1, 1, false));
148 | 
149 |     assert_eq!(1, calculate_parallelism(2, usize::MAX, false));
150 |     assert_eq!(1, calculate_parallelism(3, 4, false));
151 |     assert_eq!(1, calculate_parallelism(4, 2, false));
152 | 
153 |     for n in 5..9 {
154 |       assert_eq!(2, calculate_parallelism(n, usize::MAX, false));
155 |       assert_eq!(1, calculate_parallelism(n, usize::MAX, true));
156 |     }
157 | 
158 |     for n in 9..13 {
159 |       assert_eq!(3, calculate_parallelism(n, usize::MAX, false));
160 |       assert_eq!(1, calculate_parallelism(n, usize::MAX, true));
161 |     }
162 | 
163 |     for n in 13..33 {
164 |       assert_eq!(4, calculate_parallelism(n, usize::MAX, false));
165 |       assert_eq!(1, calculate_parallelism(n, usize::MAX, true));
166 |     }
167 | 
168 |     for n in 1..10_000 {
169 |       assert!(calculate_parallelism(n, 12, false) <= 12);
170 |       assert_eq!(1, calculate_parallelism(n, 12, true));
171 |     }
172 |   }
173 | 
174 |   fn search_empty_with_parallelism(parallelism: usize) {
175 |     let rs = search_score("", &[], parallelism);
176 |     assert_eq!(0, rs.len());
177 | 
178 |     let rs = search_score("test", &[], parallelism);
179 |     assert_eq!(0, rs.len());
180 |   }
181 | 
182 |   fn search_with_parallelism(parallelism: usize) {
183 |     search_empty_with_parallelism(parallelism);
184 | 
185 |     let rs = search_score("", &["tags"], parallelism);
186 |     assert_eq!(1, rs.len());
187 |     assert_eq!(0, rs[0].candidate_index);
188 | 
189 |     let rs = search_score("♺", &["ñîƹ♺à"], parallelism);
190 |     assert_eq!(1, rs.len());
191 |     assert_eq!(0, rs[0].candidate_index);
192 | 
193 |     let cs = &["tags", "test"];
194 | 
195 |     let rs = search_score("", cs, parallelism);
196 |     assert_eq!(2, rs.len());
197 | 
198 |     let rs = search_score("te", cs, parallelism);
199 |     assert_eq!(1, rs.len());
200 |     assert_eq!(1, rs[0].candidate_index);
201 | 
202 |     let rs = search_score("foobar", cs, parallelism);
203 |     assert_eq!(0, rs.len());
204 | 
205 |     let rs = search_score("ts", cs, parallelism);
206 |     assert_eq!(2, rs.len());
207 |     assert_eq!(
208 |       vec![1, 0],
209 |       rs.iter().map(|r| r.candidate_index).collect::<Vec<_>>()
210 |     );
211 |   }
212 | 
213 |   fn search_med_parallelism(parallelism: usize) {
214 |     let cs = &[
215 |       "one",
216 |       "two",
217 |       "three",
218 |       "four",
219 |       "five",
220 |       "six",
221 |       "seven",
222 |       "eight",
223 |       "nine",
224 |       "ten",
225 |       "eleven",
226 |       "twelve",
227 |       "thirteen",
228 |       "fourteen",
229 |       "fifteen",
230 |       "sixteen",
231 |       "seventeen",
232 |       "eighteen",
233 |       "nineteen",
234 |       "twenty",
235 |     ];
236 | 
237 |     let rs = search_score("", cs, parallelism);
238 |     assert_eq!(cs.len(), rs.len());
239 | 
240 |     let rs = search_score("teen", cs, parallelism);
241 |     assert_eq!(7, rs.len());
242 |     for r in rs {
243 |       assert_eq!(
244 |         "neet",
245 |         cs[r.candidate_index].chars().rev().take(4).collect::<String>()
246 |       );
247 |     }
248 | 
249 |     let rs = search_score("tee", cs, parallelism);
250 |     assert_eq!(9, rs.len());
251 |     assert_eq!(
252 |       "neet",
253 |       cs[rs[0].candidate_index].chars().rev().take(4).collect::<String>()
254 |     );
255 | 
256 |     let rs = search_score("six", cs, parallelism);
257 |     assert_eq!("six", cs[rs[0].candidate_index]);
258 |   }
259 | 
260 |   fn search_large_parallelism(parallelism: usize) {
261 |     let n = 100_000;
262 |     let mut candidates = Vec::with_capacity(n);
263 |     for i in 0..n {
264 |       candidates.push(format!("{}", i));
265 |     }
266 | 
267 |     let rs = search_score(
268 |       "12",
269 |       &(candidates.iter().map(|s| &s[..]).collect::<Vec<&str>>()),
270 |       parallelism,
271 |     );
272 | 
273 |     // This has been precalculated
274 |     // e.g. via `$ seq 0 99999 | grep '.*1.*2.*' | wc -l`
275 |     assert_eq!(8146, rs.len());
276 |     assert_eq!("12", candidates[rs[0].candidate_index]);
277 |   }
278 | 
279 |   // TODO: test locate
280 | 
281 |   #[test]
282 |   fn search_single() {
283 |     search_with_parallelism(0);
284 |     search_with_parallelism(1);
285 |     search_large_parallelism(1);
286 |   }
287 | 
288 |   #[test]
289 |   fn search_double() {
290 |     search_with_parallelism(2);
291 |     search_large_parallelism(2);
292 |   }
293 | 
294 |   #[test]
295 |   fn search_quad() {
296 |     search_med_parallelism(4);
297 |     search_large_parallelism(4);
298 |   }
299 | 
300 |   #[test]
301 |   fn search_quin() {
302 |     search_med_parallelism(4);
303 |     search_large_parallelism(5);
304 |   }
305 | 
306 |   #[test]
307 |   fn search_large() {
308 |     search_med_parallelism(4);
309 |     search_large_parallelism(16);
310 |   }
311 | }
312 | 


--------------------------------------------------------------------------------