├── .gitattributes
├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── CHANGELOG.rst
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── Readme.md
├── SECURITY.md
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    ├── README.md
    └── fuzz_targets
    │   ├── damerau_levenshtein.rs
    │   ├── indel.rs
    │   ├── jaro.rs
    │   ├── jaro_winkler.rs
    │   ├── lcs_seq.rs
    │   └── levenshtein.rs
├── rapidfuzz-benches
    ├── Cargo.toml
    ├── benches
    │   ├── bench_damerau_levenshtein.rs
    │   ├── bench_generic_levenshtein.rs
    │   ├── bench_indel.rs
    │   ├── bench_jaro.rs
    │   ├── bench_jaro_winkler.rs
    │   ├── bench_lcs_seq.rs
    │   ├── bench_levenshtein.rs
    │   └── bench_osa.rs
    └── results
    │   ├── damerau_levenshtein.svg
    │   ├── generic_levenshtein.svg
    │   ├── indel.svg
    │   ├── jaro.svg
    │   ├── jaro_winkler.svg
    │   ├── levenshtein.svg
    │   ├── longest_common_subsequence.svg
    │   └── osa.svg
└── src
    ├── common.rs
    ├── details.rs
    ├── details
        ├── common.rs
        ├── distance.rs
        ├── growing_hashmap.rs
        ├── intrinsics.rs
        ├── matrix.rs
        └── pattern_match_vector.rs
    ├── distance.rs
    ├── distance
        ├── damerau_levenshtein.rs
        ├── example.rs
        ├── example
        │   └── ocr.rs
        ├── hamming.rs
        ├── indel.rs
        ├── jaro.rs
        ├── jaro_winkler.rs
        ├── lcs_seq.rs
        ├── levenshtein.rs
        ├── osa.rs
        ├── postfix.rs
        └── prefix.rs
    ├── fuzz.rs
    └── lib.rs


/.gitattributes:
--------------------------------------------------------------------------------
1 | rapidfuzz-benches/results* linguist-vendored


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | env:
 8 |   CARGO_TERM_COLOR: always
 9 | 
10 | jobs:
11 |   build:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Build
18 |       run: cargo build --verbose
19 |     - name: Run tests
20 |       run: cargo test --verbose
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 8 | Cargo.lock
 9 | 
10 | # These are backup files generated by rustfmt
11 | **/*.rs.bk
12 | 
13 | lib
14 | .vscode/
15 | 
16 | /.idea/


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | ---------
 3 | 
 4 | [0.5.0] - 2023-12-01
 5 | ^^^^^^^^^^^^^^^^^^^^
 6 | Changed
 7 | ~~~~~~~
 8 | * improve Args builder for hamming
 9 | 
10 | 
11 | [0.4.0] - 2023-12-01
12 | ^^^^^^^^^^^^^^^^^^^^
13 | Changed
14 | ~~~~~~~
15 | * rewrite of function signatures to reduce boilerplate
16 | 
17 |   * return type now automatically deduced, so no more unwrapping needed
18 |     when ``score_cutoff`` is not used
19 |   * optional arguments now in Arg structs uisng the builder pattern to reduce amount
20 |     of extra arguments
21 |   * extra overload ``*_with_args`` for a variant with args, while the default version accepts
22 |     only two sequences
23 | 
24 |   The signatures is expected to largely stay this way for the foreseeable future.
25 | 
26 | [0.3.2] - 2023-11-29
27 | ^^^^^^^^^^^^^^^^^^^^
28 | Fixed
29 | ~~~~~
30 | * fixed crash inside hashmap grow function leading to a crash in the
31 |   Damerau-Levenshtein implementation
32 | * fixed incorrect flagging of similar characters in Jaro similarity
33 | * fixed wraparound in Longest Common Subsequence
34 | 
35 | [0.3.1] - 2023-11-29
36 | ^^^^^^^^^^^^^^^^^^^^
37 | Fixed
38 | ~~~~~
39 | * fixed crash inside hashmap lookup function leading to a crash in the
40 |   Damerau-Levenshtein implementation
41 | 
42 | [0.3.0] - 2023-11-27
43 | ^^^^^^^^^^^^^^^^^^^^
44 | Previous versions only existed for testing purposed years ago. This is a complete
45 | rewrite porting a subset of the features provided in the C++ implementation of
46 | rapidfuzz. The remaining features will be added in later releases.
47 | 
48 | Added
49 | ~~~~~
50 | * added implementations of the following string metrics:
51 | 
52 |   * Levenshtein distance
53 |   * Damerau-Levenshtein distance
54 |   * Hamming distance
55 |   * Longest common subsequence
56 |   * Indel distance
57 |   * Optimal string alignment distance
58 |   * Postfix similarity
59 |   * Prefix similarity
60 |   * Jaro similarity
61 |   * Jaro-Winkler similarity
62 | 
63 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | description="rapid fuzzy string matching library"
 3 | name = "rapidfuzz"
 4 | version = "0.5.0"
 5 | authors = ["maxbachmann <oss@maxbachmann.de>"]
 6 | edition = "2021"
 7 | readme = "Readme.md"
 8 | license = "MIT"
 9 | repository = "https://github.com/rapidfuzz/rapidfuzz-rs"
10 | documentation = "https://docs.rs/rapidfuzz/"
11 | keywords = ["string", "similarity", "Hamming", "Levenshtein", "Jaro"]
12 | exclude = [".*"]
13 | categories = ["text-processing"]
14 | 
15 | [lib]
16 | name = "rapidfuzz"
17 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person obtaining
 2 | a copy of this software and associated documentation files (the
 3 | "Software"), to deal in the Software without restriction, including
 4 | without limitation the rights to use, copy, modify, merge, publish,
 5 | distribute, sublicense, and/or sell copies of the Software, and to
 6 | permit persons to whom the Software is furnished to do so, subject to
 7 | the following conditions:
 8 | 
 9 | The above copyright notice and this permission notice shall be
10 | included in all copies or substantial portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
14 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
17 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
18 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">
  2 | <img src="https://raw.githubusercontent.com/rapidfuzz/rapidfuzz/master/docs/img/RapidFuzz.svg?sanitize=true" alt="RapidFuzz" width="400">
  3 | </h1>
  4 | <h4 align="center">Rapid fuzzy string matching in Rust using the Levenshtein Distance</h4>
  5 | 
  6 | <p align="center">
  7 |   <a href="https://github.com/rapidfuzz/rapidfuzz-rs/actions">
  8 |     <img src="https://github.com/rapidfuzz/rapidfuzz-rs/workflows/Rust/badge.svg"
  9 |          alt="Continous Integration">
 10 |   </a>
 11 |   <a href="https://gitter.im/rapidfuzz/community">
 12 |     <img src="https://badges.gitter.im/rapidfuzz/community.svg"
 13 |          alt="Gitter chat">
 14 |   </a>
 15 |   <a href="https://rapidfuzz.github.io/rapidfuzz">
 16 |     <img src="https://img.shields.io/badge/-documentation-blue"
 17 |          alt="Documentation">
 18 |   </a>
 19 |   <a href="https://img.shields.io/crates/l/rapidfuzz.svg">
 20 |     <img src="https://img.shields.io/crates/l/rapidfuzz.svg"
 21 |          alt="license">
 22 |   </a>
 23 | </p>
 24 | 
 25 | <p align="center">
 26 |   <a href="#description">Description</a> •
 27 |   <a href="#installation">Installation</a> •
 28 |   <a href="#usage">Usage</a> •
 29 |   <a href="#license">License</a>
 30 | </p>
 31 | 
 32 | ---
 33 | ## Description
 34 | 
 35 | RapidFuzz is a general purpose string matching library with implementations
 36 | for Rust, C++ and Python.
 37 | 
 38 | ### Key Features
 39 | 
 40 | - **Diverse String Metrics**: Offers a variety of string metrics
 41 |   to suit different use cases. These range from the Levenshtein
 42 |   distance for edit-based comparisons to the Jaro-Winkler similarity for
 43 |   more nuanced similarity assessments.
 44 | - **Optimized for Speed**: The library is designed with performance in mind.
 45 |   Each implementation is carefully designed to ensure optimal performance,
 46 |   making it suitable for the analysis of large datasets.
 47 | - **Easy to use**: The API is designed to be simple to use, while still giving
 48 |   the implementation room for optimization.
 49 | 
 50 | ## Installation
 51 | 
 52 | The installation is as simple as:
 53 | ```console
 54 | $ cargo add rapidfuzz
 55 | ```
 56 | 
 57 | ## Usage
 58 | 
 59 | The following examples show the usage with the Levenshtein distance. Other metrics
 60 | can be found in the [fuzz](https://docs.rs/rapidfuzz/latest/rapidfuzz/fuzz/index.html) and [distance](https://docs.rs/rapidfuzz/latest/rapidfuzz/distance/index.html) modules.
 61 | 
 62 | ```rust
 63 | use rapidfuzz::distance::levenshtein;
 64 | 
 65 | // Perform a simple comparision using he levenshtein distance
 66 | assert_eq!(
 67 |     3,
 68 |     levenshtein::distance("kitten".chars(), "sitting".chars())
 69 | );
 70 | 
 71 | // If you are sure the input strings are ASCII only it's usually faster to operate on bytes
 72 | assert_eq!(
 73 |     3,
 74 |     levenshtein::distance("kitten".bytes(), "sitting".bytes())
 75 | );
 76 | 
 77 | // You can provide a score_cutoff value to filter out strings with distance that is worse than
 78 | // the score_cutoff
 79 | assert_eq!(
 80 |     None,
 81 |     levenshtein::distance_with_args(
 82 |         "kitten".chars(),
 83 |         "sitting".chars(),
 84 |         &levenshtein::Args::default().score_cutoff(2)
 85 |     )
 86 | );
 87 | 
 88 | // You can provide a score_hint to tell the implementation about the expected score.
 89 | // This can be used to select a more performant implementation internally, but might cause
 90 | // a slowdown in cases where the distance is actually worse than the score_hint
 91 | assert_eq!(
 92 |     3,
 93 |     levenshtein::distance_with_args(
 94 |         "kitten".chars(),
 95 |         "sitting".chars(),
 96 |         &levenshtein::Args::default().score_hint(2)
 97 |     )
 98 | );
 99 | 
100 | // When comparing a single string to multiple strings you can use the
101 | // provided `BatchComparators`. These can cache part of the calculation
102 | // which can provide significant speedups
103 | let scorer = levenshtein::BatchComparator::new("kitten".chars());
104 | assert_eq!(3, scorer.distance("sitting".chars()));
105 | assert_eq!(0, scorer.distance("kitten".chars()));
106 | ```
107 | 
108 | 
109 | ## License
110 | Licensed under either of [Apache License, Version
111 | 2.0](https://github.com/rapidfuzz/rapidfuzz-rs/blob/main/LICENSE-APACHE) or [MIT License](https://github.com/rapidfuzz/rapidfuzz-rs/blob/main/LICENSE-MIT) at your option.
112 | 
113 | Unless you explicitly state otherwise, any contribution intentionally submitted
114 | for inclusion in RapidFuzz by you, as defined in the Apache-2.0 license, shall be
115 | dual licensed as above, without any additional terms or conditions.
116 | 
117 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | ## Reporting Security Issues
 2 | 
 3 | If you believe you have found a security vulnerability in the project, please report it to us through coordinated disclosure.
 4 | 
 5 | **Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.**
 6 | 
 7 | Instead, please send an email to oss@maxbachmann.de.
 8 | 
 9 | Please include as much of the information listed below as you can to help us better understand and resolve the issue:
10 | 
11 |   * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
12 |   * Full paths of source file(s) related to the manifestation of the issue
13 |   * The location of the affected source code (tag/branch/commit or direct URL)
14 |   * Any special configuration required to reproduce the issue
15 |   * Step-by-step instructions to reproduce the issue
16 |   * Proof-of-concept or exploit code (if possible)
17 |   * Impact of the issue, including how an attacker might exploit the issue
18 | 
19 | This information will help us triage your report more quickly.
20 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | corpus
3 | artifacts
4 | coverage


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rapidfuzz_fuzz"
 3 | version = "0.0.0"
 4 | publish = false
 5 | edition = "2021"
 6 | 
 7 | [package.metadata]
 8 | cargo-fuzz = true
 9 | 
10 | [dependencies]
11 | libfuzzer-sys = "0.4.6"
12 | arbitrary = { version = "1.3.0", features = ["derive"] }
13 | 
14 | rapidfuzz = { path = "../" }
15 | cargo-fuzz = "0.11.2"
16 | 
17 | [profile.release]
18 | debug = 1
19 | 
20 | [[bin]]
21 | name = "damerau-levenshtein"
22 | path = "fuzz_targets/damerau_levenshtein.rs"
23 | test = false
24 | doc = false
25 | 
26 | [[bin]]
27 | name = "levenshtein"
28 | path = "fuzz_targets/levenshtein.rs"
29 | test = false
30 | doc = false
31 | 
32 | [[bin]]
33 | name = "indel"
34 | path = "fuzz_targets/indel.rs"
35 | test = false
36 | doc = false
37 | 
38 | [[bin]]
39 | name = "lcs_seq"
40 | path = "fuzz_targets/lcs_seq.rs"
41 | test = false
42 | doc = false
43 | 
44 | [[bin]]
45 | name = "jaro"
46 | path = "fuzz_targets/jaro.rs"
47 | test = false
48 | doc = false
49 | 
50 | [[bin]]
51 | name = "jaro-winkler"
52 | path = "fuzz_targets/jaro_winkler.rs"
53 | test = false
54 | doc = false


--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
 1 | # rapidfuzz-fuzz
 2 | 
 3 | This directory contains fuzzers which can be used to automatically identify faults present in Boa. All the fuzzers in
 4 | this directory are [grammar-aware](https://www.fuzzingbook.org/html/Grammars.html) (based on
 5 | [Arbitrary](https://docs.rs/arbitrary/latest/arbitrary/)) and coverage-guided.
 6 | 
 7 | You can run any fuzzer you wish with the following command (replacing `your-fuzzer` with a fuzzer availble in
 8 | fuzz_targets, e.g. `damerau-levenshtein`):
 9 | 
10 | ```bash
11 | cargo fuzz run -s none your-fuzzer
12 | ```
13 | 
14 | Note that you may wish to use a different sanitizer option (`-s`) according to what kind of issue you're looking for.
15 | Refer to the [cargo-fuzz book](https://rust-fuzz.github.io/book/cargo-fuzz.html) for details on how to select a
16 | sanitizer and other flags.
17 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/damerau_levenshtein.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::damerau_levenshtein;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     damerau_levenshtein::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     damerau_levenshtein::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/indel.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::indel;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     indel::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     indel::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/jaro.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::jaro;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     jaro::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     jaro::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/jaro_winkler.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::jaro_winkler;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     jaro_winkler::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     jaro_winkler::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/lcs_seq.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::lcs_seq;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     lcs_seq::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     lcs_seq::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/levenshtein.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use arbitrary::Arbitrary;
 4 | use libfuzzer_sys::fuzz_target;
 5 | use rapidfuzz::distance::levenshtein;
 6 | 
 7 | #[derive(Arbitrary, Debug)]
 8 | pub struct Texts {
 9 |     pub s1: String,
10 |     pub s2: String,
11 | }
12 | 
13 | fn fuzz(texts: Texts) {
14 |     levenshtein::distance(texts.s1.chars(), texts.s2.chars());
15 | 
16 |     levenshtein::BatchComparator::new(texts.s1.chars()).distance(texts.s2.chars());
17 | }
18 | 
19 | fuzz_target!(|texts: Texts| {
20 |     fuzz(texts);
21 | });
22 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rapidfuzz-benches"
 3 | version = "0.1.0"
 4 | description = "In-tree benchmarks for the RapidFuzz project"
 5 | authors = ["maxbachmann <oss@maxbachmann.de>"]
 6 | edition = "2021"
 7 | publish = false
 8 | 
 9 | [dependencies]
10 | rapidfuzz = { path = "../" }
11 | 
12 | [dev-dependencies]
13 | criterion = { version = "0.5.1", features = ["html_reports"] }
14 | rand = "0.8.5"
15 | strsim = "0.10.0"
16 | 
17 | [[bench]]
18 | name = "bench_generic_levenshtein"
19 | harness = false
20 | 
21 | [[bench]]
22 | name = "bench_jaro_winkler"
23 | harness = false
24 | 
25 | [[bench]]
26 | name = "bench_jaro"
27 | harness = false
28 | 
29 | [[bench]]
30 | name = "bench_levenshtein"
31 | harness = false
32 | 
33 | [[bench]]
34 | name = "bench_osa"
35 | harness = false
36 | 
37 | [[bench]]
38 | name = "bench_damerau_levenshtein"
39 | harness = false
40 | 
41 | [[bench]]
42 | name = "bench_lcs_seq"
43 | harness = false
44 | 
45 | [[bench]]
46 | name = "bench_indel"
47 | harness = false
48 | 
49 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_damerau_levenshtein.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Damerau Levenshtein");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::damerau_levenshtein::distance(
37 |                     val.0.bytes(),
38 |                     val.1.bytes(),
39 |                 ));
40 |             })
41 |         });
42 |         let (x, y): (Vec<_>, Vec<_>) = (s1.bytes().collect(), s2.bytes().collect());
43 |         group.bench_with_input(BenchmarkId::new("strsim", i), &(&x, &y), |b, val| {
44 |             b.iter(|| {
45 |                 black_box(strsim::generic_damerau_levenshtein(
46 |                     val.0.as_slice(),
47 |                     val.1.as_slice(),
48 |                 ));
49 |             })
50 |         });
51 |     }
52 | 
53 |     group.finish();
54 | }
55 | 
56 | criterion_group!(benches, benchmark);
57 | criterion_main!(benches);
58 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_generic_levenshtein.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Generic Levenshtein");
29 | 
30 |     let lens = (2..128).step_by(2);
31 |     for i in lens {
32 |         let s1 = generate(i);
33 |         let s2 = generate(i);
34 |         let args =
35 |             distance::levenshtein::Args::default().weights(&distance::levenshtein::WeightTable {
36 |                 insertion_cost: 1,
37 |                 deletion_cost: 2,
38 |                 substitution_cost: 3,
39 |             });
40 | 
41 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
42 |             b.iter(|| {
43 |                 black_box(distance::levenshtein::distance_with_args(
44 |                     val.0.bytes(),
45 |                     val.1.bytes(),
46 |                     &args,
47 |                 ));
48 |             })
49 |         });
50 |     }
51 | 
52 |     group.finish();
53 | }
54 | 
55 | criterion_group!(benches, benchmark);
56 | criterion_main!(benches);
57 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_indel.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Indel");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::indel::distance(val.0.bytes(), val.1.bytes()));
37 |             })
38 |         });
39 | 
40 |         let cached = distance::indel::BatchComparator::new(s1.bytes());
41 |         group.bench_with_input(
42 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
43 |             &(&cached, &s2),
44 |             |b, val| {
45 |                 b.iter(|| {
46 |                     black_box(cached.distance(val.1.bytes()));
47 |                 })
48 |             },
49 |         );
50 |     }
51 | 
52 |     group.finish();
53 | }
54 | 
55 | criterion_group!(benches, benchmark);
56 | criterion_main!(benches);
57 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_jaro.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Jaro");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::jaro::similarity(val.0.bytes(), val.1.bytes()));
37 |             })
38 |         });
39 | 
40 |         let cached = distance::jaro::BatchComparator::new(s1.bytes());
41 |         group.bench_with_input(
42 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
43 |             &(&cached, &s2),
44 |             |b, val| {
45 |                 b.iter(|| {
46 |                     black_box(cached.similarity(val.1.bytes()));
47 |                 })
48 |             },
49 |         );
50 | 
51 |         group.bench_with_input(BenchmarkId::new("strsim", i), &(&s1, &s2), |b, val| {
52 |             b.iter(|| {
53 |                 black_box(strsim::generic_jaro(
54 |                     &StringWrapper(val.0),
55 |                     &StringWrapper(val.1),
56 |                 ));
57 |             })
58 |         });
59 |     }
60 | 
61 |     group.finish();
62 | }
63 | 
64 | criterion_group!(benches, benchmark);
65 | criterion_main!(benches);
66 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_jaro_winkler.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("JaroWinkler");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::jaro_winkler::similarity(
37 |                     val.0.bytes(),
38 |                     val.1.bytes(),
39 |                 ));
40 |             })
41 |         });
42 | 
43 |         let cached = distance::jaro_winkler::BatchComparator::new(s1.bytes());
44 |         group.bench_with_input(
45 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
46 |             &(&cached, &s2),
47 |             |b, val| {
48 |                 b.iter(|| {
49 |                     black_box(cached.similarity(val.1.bytes()));
50 |                 })
51 |             },
52 |         );
53 | 
54 |         group.bench_with_input(BenchmarkId::new("strsim", i), &(&s1, &s2), |b, val| {
55 |             b.iter(|| {
56 |                 black_box(strsim::generic_jaro_winkler(
57 |                     &StringWrapper(val.0),
58 |                     &StringWrapper(val.1),
59 |                 ));
60 |             })
61 |         });
62 |     }
63 | 
64 |     group.finish();
65 | }
66 | 
67 | criterion_group!(benches, benchmark);
68 | criterion_main!(benches);
69 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_lcs_seq.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Longest Common Subsequence");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::lcs_seq::similarity(val.0.bytes(), val.1.bytes()));
37 |             })
38 |         });
39 | 
40 |         let cached = distance::lcs_seq::BatchComparator::new(s1.bytes());
41 |         group.bench_with_input(
42 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
43 |             &(&cached, &s2),
44 |             |b, val| {
45 |                 b.iter(|| {
46 |                     black_box(cached.similarity(val.1.bytes()));
47 |                 })
48 |             },
49 |         );
50 |     }
51 | 
52 |     group.finish();
53 | }
54 | 
55 | criterion_group!(benches, benchmark);
56 | criterion_main!(benches);
57 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_levenshtein.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("Levenshtein");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::levenshtein::distance(
37 |                     val.0.bytes(),
38 |                     val.1.bytes(),
39 |                 ));
40 |             })
41 |         });
42 |         group.bench_with_input(BenchmarkId::new("strsim", i), &(&s1, &s2), |b, val| {
43 |             b.iter(|| {
44 |                 black_box(strsim::generic_levenshtein(
45 |                     &StringWrapper(val.0),
46 |                     &StringWrapper(val.1),
47 |                 ));
48 |             })
49 |         });
50 | 
51 |         let cached = distance::levenshtein::BatchComparator::new(s1.bytes());
52 |         group.bench_with_input(
53 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
54 |             &(&cached, &s2),
55 |             |b, val| {
56 |                 b.iter(|| {
57 |                     black_box(cached.distance(val.1.bytes()));
58 |                 })
59 |             },
60 |         );
61 |     }
62 | 
63 |     group.finish();
64 | }
65 | 
66 | criterion_group!(benches, benchmark);
67 | criterion_main!(benches);
68 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/benches/bench_osa.rs:
--------------------------------------------------------------------------------
 1 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 2 | use rand::{distributions::Alphanumeric, Rng};
 3 | 
 4 | use rapidfuzz::distance;
 5 | 
 6 | use std::str::Bytes;
 7 | 
 8 | fn generate(len: usize) -> String {
 9 |     rand::thread_rng()
10 |         .sample_iter(&Alphanumeric)
11 |         .take(len)
12 |         .map(char::from)
13 |         .collect()
14 | }
15 | 
16 | struct StringWrapper<'a>(&'a str);
17 | 
18 | impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
19 |     type Item = u8;
20 |     type IntoIter = Bytes<'b>;
21 | 
22 |     fn into_iter(self) -> Self::IntoIter {
23 |         self.0.bytes()
24 |     }
25 | }
26 | 
27 | fn benchmark(c: &mut Criterion) {
28 |     let mut group = c.benchmark_group("OSA");
29 | 
30 |     for i in (2..128).step_by(2) {
31 |         let s1 = generate(i);
32 |         let s2 = generate(i);
33 | 
34 |         group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
35 |             b.iter(|| {
36 |                 black_box(distance::osa::distance(val.0.chars(), val.1.chars()));
37 |             })
38 |         });
39 | 
40 |         let cached = distance::osa::BatchComparator::new(s1.chars());
41 |         group.bench_with_input(
42 |             BenchmarkId::new("rapidfuzz (BatchComparator)", i),
43 |             &(&cached, &s2),
44 |             |b, val| {
45 |                 b.iter(|| {
46 |                     black_box(cached.distance(val.1.chars()));
47 |                 })
48 |             },
49 |         );
50 | 
51 |         group.bench_with_input(BenchmarkId::new("strsim", i), &(&s1, &s2), |b, val| {
52 |             b.iter(|| {
53 |                 black_box(strsim::osa_distance(val.0, val.1));
54 |             })
55 |         });
56 |     }
57 | 
58 |     group.finish();
59 | }
60 | 
61 | criterion_group!(benches, benchmark);
62 | criterion_main!(benches);
63 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/results/damerau_levenshtein.svg:
--------------------------------------------------------------------------------
  1 | <svg width="960" height="540" viewBox="0 0 960 540" xmlns="http://www.w3.org/2000/svg">
  2 | <rect width="100%" height="100%" fill="white"/>
  3 | <text x="480" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
  4 | Damerau Levenshtein: Comparison
  5 | </text>
  6 | <text x="26" y="263" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 26, 263)">
  7 | Average time (µs)
  8 | </text>
  9 | <text x="510" y="514" dy="-0.5ex" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 10 | Input
 11 | </text>
 12 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="85,52 85,473 "/>
 13 | <text x="76" y="435" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 14 | 20.0
 15 | </text>
 16 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,435 85,435 "/>
 17 | <text x="76" y="396" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 18 | 40.0
 19 | </text>
 20 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,396 85,396 "/>
 21 | <text x="76" y="357" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 22 | 60.0
 23 | </text>
 24 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,357 85,357 "/>
 25 | <text x="76" y="319" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 26 | 80.0
 27 | </text>
 28 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,319 85,319 "/>
 29 | <text x="76" y="280" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 30 | 100.0
 31 | </text>
 32 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,280 85,280 "/>
 33 | <text x="76" y="241" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 34 | 120.0
 35 | </text>
 36 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,241 85,241 "/>
 37 | <text x="76" y="203" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 38 | 140.0
 39 | </text>
 40 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,203 85,203 "/>
 41 | <text x="76" y="164" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 42 | 160.0
 43 | </text>
 44 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,164 85,164 "/>
 45 | <text x="76" y="125" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 46 | 180.0
 47 | </text>
 48 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,125 85,125 "/>
 49 | <text x="76" y="87" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 50 | 200.0
 51 | </text>
 52 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,87 85,87 "/>
 53 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="86,474 933,474 "/>
 54 | <text x="208" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 55 | 20.0
 56 | </text>
 57 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="208,474 208,479 "/>
 58 | <text x="345" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 59 | 40.0
 60 | </text>
 61 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="345,474 345,479 "/>
 62 | <text x="482" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 63 | 60.0
 64 | </text>
 65 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="482,474 482,479 "/>
 66 | <text x="618" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 67 | 80.0
 68 | </text>
 69 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="618,474 618,479 "/>
 70 | <text x="755" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 71 | 100.0
 72 | </text>
 73 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="755,474 755,479 "/>
 74 | <text x="892" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 75 | 120.0
 76 | </text>
 77 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="892,474 892,479 "/>
 78 | <circle cx="86" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 79 | <circle cx="99" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 80 | <circle cx="113" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 81 | <circle cx="126" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 82 | <circle cx="140" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 83 | <circle cx="154" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 84 | <circle cx="167" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 85 | <circle cx="181" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 86 | <circle cx="195" cy="472" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 87 | <circle cx="208" cy="472" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 88 | <circle cx="222" cy="472" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 89 | <circle cx="236" cy="471" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 90 | <circle cx="249" cy="471" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 91 | <circle cx="263" cy="471" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 92 | <circle cx="277" cy="470" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 93 | <circle cx="290" cy="470" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 94 | <circle cx="304" cy="469" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 95 | <circle cx="318" cy="469" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 96 | <circle cx="331" cy="468" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 97 | <circle cx="345" cy="468" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 98 | <circle cx="359" cy="467" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 99 | <circle cx="372" cy="467" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
100 | <circle cx="386" cy="466" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
101 | <circle cx="400" cy="465" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
102 | <circle cx="413" cy="465" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
103 | <circle cx="427" cy="464" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
104 | <circle cx="441" cy="463" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
105 | <circle cx="454" cy="462" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
106 | <circle cx="468" cy="461" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
107 | <circle cx="482" cy="460" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
108 | <circle cx="495" cy="459" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
109 | <circle cx="509" cy="458" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
110 | <circle cx="523" cy="457" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
111 | <circle cx="536" cy="455" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
112 | <circle cx="550" cy="455" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
113 | <circle cx="564" cy="454" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
114 | <circle cx="577" cy="453" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
115 | <circle cx="591" cy="450" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
116 | <circle cx="605" cy="450" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
117 | <circle cx="618" cy="449" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
118 | <circle cx="632" cy="448" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
119 | <circle cx="646" cy="446" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
120 | <circle cx="659" cy="445" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
121 | <circle cx="673" cy="443" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
122 | <circle cx="687" cy="442" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
123 | <circle cx="700" cy="441" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
124 | <circle cx="714" cy="437" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
125 | <circle cx="728" cy="435" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
126 | <circle cx="741" cy="435" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
127 | <circle cx="755" cy="432" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
128 | <circle cx="769" cy="430" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
129 | <circle cx="782" cy="429" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
130 | <circle cx="796" cy="426" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
131 | <circle cx="810" cy="422" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
132 | <circle cx="823" cy="424" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
133 | <circle cx="837" cy="420" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
134 | <circle cx="851" cy="418" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
135 | <circle cx="864" cy="415" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
136 | <circle cx="878" cy="414" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
137 | <circle cx="892" cy="410" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
138 | <circle cx="905" cy="406" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
139 | <circle cx="919" cy="407" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
140 | <circle cx="933" cy="402" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
141 | <polyline fill="none" opacity="1" stroke="#B22222" stroke-width="1" points="86,473 99,473 113,473 126,473 140,473 154,473 167,473 181,473 195,472 208,472 222,472 236,471 249,471 263,471 277,470 290,470 304,469 318,469 331,468 345,468 359,467 372,467 386,466 400,465 413,465 427,464 441,463 454,462 468,461 482,460 495,459 509,458 523,457 536,455 550,455 564,454 577,453 591,450 605,450 618,449 632,448 646,446 659,445 673,443 687,442 700,441 714,437 728,435 741,435 755,432 769,430 782,429 796,426 810,422 823,424 837,420 851,418 864,415 878,414 892,410 905,406 919,407 933,402 "/>
142 | <circle cx="86" cy="473" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
143 | <circle cx="99" cy="473" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
144 | <circle cx="113" cy="473" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
145 | <circle cx="126" cy="472" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
146 | <circle cx="140" cy="471" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
147 | <circle cx="154" cy="470" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
148 | <circle cx="167" cy="469" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
149 | <circle cx="181" cy="467" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
150 | <circle cx="195" cy="466" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
151 | <circle cx="208" cy="464" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
152 | <circle cx="222" cy="462" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
153 | <circle cx="236" cy="459" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
154 | <circle cx="249" cy="457" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
155 | <circle cx="263" cy="454" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
156 | <circle cx="277" cy="452" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
157 | <circle cx="290" cy="450" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
158 | <circle cx="304" cy="444" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
159 | <circle cx="318" cy="443" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
160 | <circle cx="331" cy="438" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
161 | <circle cx="345" cy="436" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
162 | <circle cx="359" cy="431" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
163 | <circle cx="372" cy="426" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
164 | <circle cx="386" cy="419" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
165 | <circle cx="400" cy="419" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
166 | <circle cx="413" cy="413" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
167 | <circle cx="427" cy="409" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
168 | <circle cx="441" cy="398" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
169 | <circle cx="454" cy="394" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
170 | <circle cx="468" cy="384" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
171 | <circle cx="482" cy="383" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
172 | <circle cx="495" cy="374" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
173 | <circle cx="509" cy="368" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
174 | <circle cx="523" cy="365" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
175 | <circle cx="536" cy="355" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
176 | <circle cx="550" cy="351" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
177 | <circle cx="564" cy="344" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
178 | <circle cx="577" cy="333" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
179 | <circle cx="591" cy="327" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
180 | <circle cx="605" cy="318" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
181 | <circle cx="618" cy="307" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
182 | <circle cx="632" cy="299" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
183 | <circle cx="646" cy="293" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
184 | <circle cx="659" cy="285" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
185 | <circle cx="673" cy="272" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
186 | <circle cx="687" cy="264" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
187 | <circle cx="700" cy="253" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
188 | <circle cx="714" cy="239" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
189 | <circle cx="728" cy="226" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
190 | <circle cx="741" cy="218" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
191 | <circle cx="755" cy="209" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
192 | <circle cx="769" cy="199" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
193 | <circle cx="782" cy="186" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
194 | <circle cx="796" cy="175" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
195 | <circle cx="810" cy="160" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
196 | <circle cx="823" cy="147" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
197 | <circle cx="837" cy="137" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
198 | <circle cx="851" cy="139" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
199 | <circle cx="864" cy="112" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
200 | <circle cx="878" cy="104" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
201 | <circle cx="892" cy="81" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
202 | <circle cx="905" cy="68" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
203 | <circle cx="919" cy="69" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
204 | <circle cx="933" cy="52" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
205 | <polyline fill="none" opacity="1" stroke="#2E8B57" stroke-width="1" points="86,473 99,473 113,473 126,472 140,471 154,470 167,469 181,467 195,466 208,464 222,462 236,459 249,457 263,454 277,452 290,450 304,444 318,443 331,438 345,436 359,431 372,426 386,419 400,419 413,413 427,409 441,398 454,394 468,384 482,383 495,374 509,368 523,365 536,355 550,351 564,344 577,333 591,327 605,318 618,307 632,299 646,293 659,285 673,272 687,264 700,253 714,239 728,226 741,218 755,209 769,199 782,186 796,175 810,160 823,147 837,137 851,139 864,112 878,104 892,81 905,68 919,69 933,52 "/>
206 | <text x="131" y="67" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
207 | rapidfuzz
208 | </text>
209 | <text x="131" y="82" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
210 | strsim
211 | </text>
212 | <rect x="101" y="67" width="20" height="10" opacity="1" fill="#B22222" stroke="none"/>
213 | <rect x="101" y="82" width="20" height="10" opacity="1" fill="#2E8B57" stroke="none"/>
214 | </svg>
215 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/results/generic_levenshtein.svg:
--------------------------------------------------------------------------------
  1 | <svg width="960" height="540" viewBox="0 0 960 540" xmlns="http://www.w3.org/2000/svg">
  2 | <rect width="100%" height="100%" fill="white"/>
  3 | <text x="480" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
  4 | Generic Levenshtein: Comparison
  5 | </text>
  6 | <text x="26" y="263" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 26, 263)">
  7 | Average time (µs)
  8 | </text>
  9 | <text x="510" y="514" dy="-0.5ex" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 10 | Input
 11 | </text>
 12 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="85,52 85,473 "/>
 13 | <text x="76" y="423" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 14 | 2.0
 15 | </text>
 16 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,423 85,423 "/>
 17 | <text x="76" y="371" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 18 | 4.0
 19 | </text>
 20 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,371 85,371 "/>
 21 | <text x="76" y="320" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 22 | 6.0
 23 | </text>
 24 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,320 85,320 "/>
 25 | <text x="76" y="269" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 26 | 8.0
 27 | </text>
 28 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,269 85,269 "/>
 29 | <text x="76" y="217" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 30 | 10.0
 31 | </text>
 32 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,217 85,217 "/>
 33 | <text x="76" y="166" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 34 | 12.0
 35 | </text>
 36 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,166 85,166 "/>
 37 | <text x="76" y="115" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 38 | 14.0
 39 | </text>
 40 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,115 85,115 "/>
 41 | <text x="76" y="63" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 42 | 16.0
 43 | </text>
 44 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,63 85,63 "/>
 45 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="86,474 933,474 "/>
 46 | <text x="208" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 47 | 20.0
 48 | </text>
 49 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="208,474 208,479 "/>
 50 | <text x="345" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 51 | 40.0
 52 | </text>
 53 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="345,474 345,479 "/>
 54 | <text x="482" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 55 | 60.0
 56 | </text>
 57 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="482,474 482,479 "/>
 58 | <text x="618" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 59 | 80.0
 60 | </text>
 61 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="618,474 618,479 "/>
 62 | <text x="755" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 63 | 100.0
 64 | </text>
 65 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="755,474 755,479 "/>
 66 | <text x="892" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 67 | 120.0
 68 | </text>
 69 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="892,474 892,479 "/>
 70 | <circle cx="86" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 71 | <circle cx="99" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 72 | <circle cx="113" cy="473" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 73 | <circle cx="126" cy="472" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 74 | <circle cx="140" cy="471" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 75 | <circle cx="154" cy="471" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 76 | <circle cx="167" cy="470" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 77 | <circle cx="181" cy="468" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 78 | <circle cx="195" cy="467" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 79 | <circle cx="208" cy="465" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 80 | <circle cx="222" cy="463" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 81 | <circle cx="236" cy="461" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 82 | <circle cx="249" cy="458" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 83 | <circle cx="263" cy="456" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 84 | <circle cx="277" cy="454" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 85 | <circle cx="290" cy="450" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 86 | <circle cx="304" cy="447" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 87 | <circle cx="318" cy="443" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 88 | <circle cx="331" cy="441" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 89 | <circle cx="345" cy="437" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 90 | <circle cx="359" cy="432" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 91 | <circle cx="372" cy="428" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 92 | <circle cx="386" cy="424" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 93 | <circle cx="400" cy="418" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 94 | <circle cx="413" cy="412" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 95 | <circle cx="427" cy="408" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 96 | <circle cx="441" cy="403" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 97 | <circle cx="454" cy="398" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 98 | <circle cx="468" cy="394" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 99 | <circle cx="482" cy="384" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
100 | <circle cx="495" cy="381" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
101 | <circle cx="509" cy="372" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
102 | <circle cx="523" cy="363" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
103 | <circle cx="536" cy="359" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
104 | <circle cx="550" cy="352" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
105 | <circle cx="564" cy="346" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
106 | <circle cx="577" cy="338" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
107 | <circle cx="591" cy="325" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
108 | <circle cx="605" cy="324" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
109 | <circle cx="618" cy="313" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
110 | <circle cx="632" cy="301" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
111 | <circle cx="646" cy="294" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
112 | <circle cx="659" cy="286" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
113 | <circle cx="673" cy="276" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
114 | <circle cx="687" cy="266" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
115 | <circle cx="700" cy="261" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
116 | <circle cx="714" cy="251" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
117 | <circle cx="728" cy="238" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
118 | <circle cx="741" cy="222" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
119 | <circle cx="755" cy="217" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
120 | <circle cx="769" cy="211" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
121 | <circle cx="782" cy="196" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
122 | <circle cx="796" cy="188" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
123 | <circle cx="810" cy="169" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
124 | <circle cx="823" cy="165" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
125 | <circle cx="837" cy="144" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
126 | <circle cx="851" cy="142" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
127 | <circle cx="864" cy="118" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
128 | <circle cx="878" cy="121" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
129 | <circle cx="892" cy="92" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
130 | <circle cx="905" cy="90" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
131 | <circle cx="919" cy="69" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
132 | <circle cx="933" cy="52" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
133 | <polyline fill="none" opacity="1" stroke="#B22222" stroke-width="1" points="86,473 99,473 113,473 126,472 140,471 154,471 167,470 181,468 195,467 208,465 222,463 236,461 249,458 263,456 277,454 290,450 304,447 318,443 331,441 345,437 359,432 372,428 386,424 400,418 413,412 427,408 441,403 454,398 468,394 482,384 495,381 509,372 523,363 536,359 550,352 564,346 577,338 591,325 605,324 618,313 632,301 646,294 659,286 673,276 687,266 700,261 714,251 728,238 741,222 755,217 769,211 782,196 796,188 810,169 823,165 837,144 851,142 864,118 878,121 892,92 905,90 919,69 933,52 "/>
134 | <text x="131" y="67" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
135 | rapidfuzz
136 | </text>
137 | <rect x="101" y="67" width="20" height="10" opacity="1" fill="#B22222" stroke="none"/>
138 | </svg>
139 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/results/indel.svg:
--------------------------------------------------------------------------------
  1 | <svg width="960" height="540" viewBox="0 0 960 540" xmlns="http://www.w3.org/2000/svg">
  2 | <rect width="100%" height="100%" fill="white"/>
  3 | <text x="480" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
  4 | Indel: Comparison
  5 | </text>
  6 | <text x="26" y="263" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 26, 263)">
  7 | Average time (ns)
  8 | </text>
  9 | <text x="510" y="514" dy="-0.5ex" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 10 | Input
 11 | </text>
 12 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="85,52 85,473 "/>
 13 | <text x="76" y="442" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 14 | 50.0
 15 | </text>
 16 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,442 85,442 "/>
 17 | <text x="76" y="401" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 18 | 100.0
 19 | </text>
 20 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,401 85,401 "/>
 21 | <text x="76" y="360" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 22 | 150.0
 23 | </text>
 24 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,360 85,360 "/>
 25 | <text x="76" y="318" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 26 | 200.0
 27 | </text>
 28 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,318 85,318 "/>
 29 | <text x="76" y="277" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 30 | 250.0
 31 | </text>
 32 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,277 85,277 "/>
 33 | <text x="76" y="236" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 34 | 300.0
 35 | </text>
 36 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,236 85,236 "/>
 37 | <text x="76" y="195" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 38 | 350.0
 39 | </text>
 40 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,195 85,195 "/>
 41 | <text x="76" y="154" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 42 | 400.0
 43 | </text>
 44 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,154 85,154 "/>
 45 | <text x="76" y="113" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 46 | 450.0
 47 | </text>
 48 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,113 85,113 "/>
 49 | <text x="76" y="72" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 50 | 500.0
 51 | </text>
 52 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,72 85,72 "/>
 53 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="86,474 933,474 "/>
 54 | <text x="208" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 55 | 20.0
 56 | </text>
 57 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="208,474 208,479 "/>
 58 | <text x="345" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 59 | 40.0
 60 | </text>
 61 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="345,474 345,479 "/>
 62 | <text x="482" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 63 | 60.0
 64 | </text>
 65 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="482,474 482,479 "/>
 66 | <text x="618" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 67 | 80.0
 68 | </text>
 69 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="618,474 618,479 "/>
 70 | <text x="755" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 71 | 100.0
 72 | </text>
 73 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="755,474 755,479 "/>
 74 | <text x="892" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 75 | 120.0
 76 | </text>
 77 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="892,474 892,479 "/>
 78 | <circle cx="86" cy="454" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 79 | <circle cx="99" cy="462" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 80 | <circle cx="113" cy="459" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 81 | <circle cx="126" cy="457" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 82 | <circle cx="140" cy="454" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 83 | <circle cx="154" cy="452" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 84 | <circle cx="167" cy="449" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 85 | <circle cx="181" cy="447" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 86 | <circle cx="195" cy="445" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 87 | <circle cx="208" cy="443" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 88 | <circle cx="222" cy="441" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 89 | <circle cx="236" cy="438" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 90 | <circle cx="249" cy="435" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 91 | <circle cx="263" cy="434" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 92 | <circle cx="277" cy="432" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 93 | <circle cx="290" cy="429" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 94 | <circle cx="304" cy="427" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 95 | <circle cx="318" cy="425" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 96 | <circle cx="331" cy="423" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 97 | <circle cx="345" cy="420" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 98 | <circle cx="359" cy="418" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 99 | <circle cx="372" cy="416" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
100 | <circle cx="386" cy="413" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
101 | <circle cx="400" cy="410" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
102 | <circle cx="413" cy="408" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
103 | <circle cx="427" cy="406" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
104 | <circle cx="441" cy="402" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
105 | <circle cx="454" cy="401" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
106 | <circle cx="468" cy="400" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
107 | <circle cx="482" cy="395" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
108 | <circle cx="495" cy="395" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
109 | <circle cx="509" cy="391" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
110 | <circle cx="523" cy="261" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
111 | <circle cx="536" cy="256" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
112 | <circle cx="550" cy="255" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
113 | <circle cx="564" cy="241" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
114 | <circle cx="577" cy="238" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
115 | <circle cx="591" cy="239" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
116 | <circle cx="605" cy="231" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
117 | <circle cx="618" cy="228" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
118 | <circle cx="632" cy="231" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
119 | <circle cx="646" cy="227" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
120 | <circle cx="659" cy="211" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
121 | <circle cx="673" cy="204" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
122 | <circle cx="687" cy="199" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
123 | <circle cx="700" cy="198" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
124 | <circle cx="714" cy="191" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
125 | <circle cx="728" cy="198" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
126 | <circle cx="741" cy="184" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
127 | <circle cx="755" cy="180" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
128 | <circle cx="769" cy="174" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
129 | <circle cx="782" cy="167" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
130 | <circle cx="796" cy="161" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
131 | <circle cx="810" cy="156" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
132 | <circle cx="823" cy="161" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
133 | <circle cx="837" cy="158" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
134 | <circle cx="851" cy="146" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
135 | <circle cx="864" cy="134" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
136 | <circle cx="878" cy="52" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
137 | <circle cx="892" cy="123" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
138 | <circle cx="905" cy="132" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
139 | <circle cx="919" cy="125" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
140 | <circle cx="933" cy="118" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
141 | <polyline fill="none" opacity="1" stroke="#B22222" stroke-width="1" points="86,454 99,462 113,459 126,457 140,454 154,452 167,449 181,447 195,445 208,443 222,441 236,438 249,435 263,434 277,432 290,429 304,427 318,425 331,423 345,420 359,418 372,416 386,413 400,410 413,408 427,406 441,402 454,401 468,400 482,395 495,395 509,391 523,261 536,256 550,255 564,241 577,238 591,239 605,231 618,228 632,231 646,227 659,211 673,204 687,199 700,198 714,191 728,198 741,184 755,180 769,174 782,167 796,161 810,156 823,161 837,158 851,146 864,134 878,52 892,123 905,132 919,125 933,118 "/>
142 | <circle cx="86" cy="455" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
143 | <circle cx="99" cy="473" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
144 | <circle cx="113" cy="472" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
145 | <circle cx="126" cy="470" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
146 | <circle cx="140" cy="468" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
147 | <circle cx="154" cy="466" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
148 | <circle cx="167" cy="464" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
149 | <circle cx="181" cy="462" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
150 | <circle cx="195" cy="460" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
151 | <circle cx="208" cy="459" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
152 | <circle cx="222" cy="457" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
153 | <circle cx="236" cy="453" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
154 | <circle cx="249" cy="453" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
155 | <circle cx="263" cy="450" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
156 | <circle cx="277" cy="449" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
157 | <circle cx="290" cy="447" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
158 | <circle cx="304" cy="445" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
159 | <circle cx="318" cy="443" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
160 | <circle cx="331" cy="441" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
161 | <circle cx="345" cy="438" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
162 | <circle cx="359" cy="437" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
163 | <circle cx="372" cy="435" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
164 | <circle cx="386" cy="433" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
165 | <circle cx="400" cy="431" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
166 | <circle cx="413" cy="430" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
167 | <circle cx="427" cy="428" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
168 | <circle cx="441" cy="425" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
169 | <circle cx="454" cy="424" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
170 | <circle cx="468" cy="421" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
171 | <circle cx="482" cy="420" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
172 | <circle cx="495" cy="419" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
173 | <circle cx="509" cy="417" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
174 | <circle cx="523" cy="336" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
175 | <circle cx="536" cy="332" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
176 | <circle cx="550" cy="327" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
177 | <circle cx="564" cy="324" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
178 | <circle cx="577" cy="317" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
179 | <circle cx="591" cy="314" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
180 | <circle cx="605" cy="309" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
181 | <circle cx="618" cy="305" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
182 | <circle cx="632" cy="303" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
183 | <circle cx="646" cy="293" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
184 | <circle cx="659" cy="294" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
185 | <circle cx="673" cy="289" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
186 | <circle cx="687" cy="284" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
187 | <circle cx="700" cy="281" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
188 | <circle cx="714" cy="269" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
189 | <circle cx="728" cy="271" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
190 | <circle cx="741" cy="268" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
191 | <circle cx="755" cy="262" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
192 | <circle cx="769" cy="254" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
193 | <circle cx="782" cy="253" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
194 | <circle cx="796" cy="248" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
195 | <circle cx="810" cy="249" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
196 | <circle cx="823" cy="243" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
197 | <circle cx="837" cy="241" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
198 | <circle cx="851" cy="232" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
199 | <circle cx="864" cy="232" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
200 | <circle cx="878" cy="226" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
201 | <circle cx="892" cy="221" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
202 | <circle cx="905" cy="214" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
203 | <circle cx="919" cy="215" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
204 | <circle cx="933" cy="205" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
205 | <polyline fill="none" opacity="1" stroke="#2E8B57" stroke-width="1" points="86,455 99,473 113,472 126,470 140,468 154,466 167,464 181,462 195,460 208,459 222,457 236,453 249,453 263,450 277,449 290,447 304,445 318,443 331,441 345,438 359,437 372,435 386,433 400,431 413,430 427,428 441,425 454,424 468,421 482,420 495,419 509,417 523,336 536,332 550,327 564,324 577,317 591,314 605,309 618,305 632,303 646,293 659,294 673,289 687,284 700,281 714,269 728,271 741,268 755,262 769,254 782,253 796,248 810,249 823,243 837,241 851,232 864,232 878,226 892,221 905,214 919,215 933,205 "/>
206 | <text x="131" y="67" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
207 | rapidfuzz
208 | </text>
209 | <text x="131" y="82" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
210 | rapidfuzz (BatchComparator)
211 | </text>
212 | <rect x="101" y="67" width="20" height="10" opacity="1" fill="#B22222" stroke="none"/>
213 | <rect x="101" y="82" width="20" height="10" opacity="1" fill="#2E8B57" stroke="none"/>
214 | </svg>
215 | 


--------------------------------------------------------------------------------
/rapidfuzz-benches/results/longest_common_subsequence.svg:
--------------------------------------------------------------------------------
  1 | <svg width="960" height="540" viewBox="0 0 960 540" xmlns="http://www.w3.org/2000/svg">
  2 | <rect width="100%" height="100%" fill="white"/>
  3 | <text x="480" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
  4 | Longest Common Subsequence: Comparison
  5 | </text>
  6 | <text x="26" y="263" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 26, 263)">
  7 | Average time (ns)
  8 | </text>
  9 | <text x="510" y="514" dy="-0.5ex" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 10 | Input
 11 | </text>
 12 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="85,52 85,473 "/>
 13 | <text x="76" y="438" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 14 | 50.0
 15 | </text>
 16 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,438 85,438 "/>
 17 | <text x="76" y="392" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 18 | 100.0
 19 | </text>
 20 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,392 85,392 "/>
 21 | <text x="76" y="347" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 22 | 150.0
 23 | </text>
 24 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,347 85,347 "/>
 25 | <text x="76" y="301" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 26 | 200.0
 27 | </text>
 28 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,301 85,301 "/>
 29 | <text x="76" y="256" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 30 | 250.0
 31 | </text>
 32 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,256 85,256 "/>
 33 | <text x="76" y="210" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 34 | 300.0
 35 | </text>
 36 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,210 85,210 "/>
 37 | <text x="76" y="165" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 38 | 350.0
 39 | </text>
 40 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,165 85,165 "/>
 41 | <text x="76" y="119" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 42 | 400.0
 43 | </text>
 44 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,119 85,119 "/>
 45 | <text x="76" y="74" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 46 | 450.0
 47 | </text>
 48 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="80,74 85,74 "/>
 49 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="86,474 933,474 "/>
 50 | <text x="208" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 51 | 20.0
 52 | </text>
 53 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="208,474 208,479 "/>
 54 | <text x="345" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 55 | 40.0
 56 | </text>
 57 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="345,474 345,479 "/>
 58 | <text x="482" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 59 | 60.0
 60 | </text>
 61 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="482,474 482,479 "/>
 62 | <text x="618" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 63 | 80.0
 64 | </text>
 65 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="618,474 618,479 "/>
 66 | <text x="755" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 67 | 100.0
 68 | </text>
 69 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="755,474 755,479 "/>
 70 | <text x="892" y="484" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 71 | 120.0
 72 | </text>
 73 | <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="892,474 892,479 "/>
 74 | <circle cx="86" cy="453" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 75 | <circle cx="99" cy="461" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 76 | <circle cx="113" cy="459" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 77 | <circle cx="126" cy="455" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 78 | <circle cx="140" cy="453" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 79 | <circle cx="154" cy="450" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 80 | <circle cx="167" cy="447" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 81 | <circle cx="181" cy="441" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 82 | <circle cx="195" cy="441" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 83 | <circle cx="208" cy="439" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 84 | <circle cx="222" cy="436" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 85 | <circle cx="236" cy="433" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 86 | <circle cx="249" cy="431" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 87 | <circle cx="263" cy="427" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 88 | <circle cx="277" cy="425" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 89 | <circle cx="290" cy="422" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 90 | <circle cx="304" cy="421" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 91 | <circle cx="318" cy="417" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 92 | <circle cx="331" cy="414" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 93 | <circle cx="345" cy="411" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 94 | <circle cx="359" cy="409" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 95 | <circle cx="372" cy="405" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 96 | <circle cx="386" cy="403" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 97 | <circle cx="400" cy="400" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 98 | <circle cx="413" cy="397" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
 99 | <circle cx="427" cy="395" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
100 | <circle cx="441" cy="396" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
101 | <circle cx="454" cy="391" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
102 | <circle cx="468" cy="387" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
103 | <circle cx="482" cy="385" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
104 | <circle cx="495" cy="380" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
105 | <circle cx="509" cy="380" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
106 | <circle cx="523" cy="230" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
107 | <circle cx="536" cy="214" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
108 | <circle cx="550" cy="208" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
109 | <circle cx="564" cy="203" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
110 | <circle cx="577" cy="197" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
111 | <circle cx="591" cy="187" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
112 | <circle cx="605" cy="190" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
113 | <circle cx="618" cy="180" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
114 | <circle cx="632" cy="183" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
115 | <circle cx="646" cy="168" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
116 | <circle cx="659" cy="173" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
117 | <circle cx="673" cy="157" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
118 | <circle cx="687" cy="150" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
119 | <circle cx="700" cy="146" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
120 | <circle cx="714" cy="138" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
121 | <circle cx="728" cy="143" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
122 | <circle cx="741" cy="128" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
123 | <circle cx="755" cy="118" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
124 | <circle cx="769" cy="118" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
125 | <circle cx="782" cy="122" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
126 | <circle cx="796" cy="120" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
127 | <circle cx="810" cy="105" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
128 | <circle cx="823" cy="100" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
129 | <circle cx="837" cy="96" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
130 | <circle cx="851" cy="93" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
131 | <circle cx="864" cy="84" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
132 | <circle cx="878" cy="75" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
133 | <circle cx="892" cy="73" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
134 | <circle cx="905" cy="66" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
135 | <circle cx="919" cy="64" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
136 | <circle cx="933" cy="52" r="3" opacity="1" fill="#B22222" stroke="none" stroke-width="1"/>
137 | <polyline fill="none" opacity="1" stroke="#B22222" stroke-width="1" points="86,453 99,461 113,459 126,455 140,453 154,450 167,447 181,441 195,441 208,439 222,436 236,433 249,431 263,427 277,425 290,422 304,421 318,417 331,414 345,411 359,409 372,405 386,403 400,400 413,397 427,395 441,396 454,391 468,387 482,385 495,380 509,380 523,230 536,214 550,208 564,203 577,197 591,187 605,190 618,180 632,183 646,168 659,173 673,157 687,150 700,146 714,138 728,143 741,128 755,118 769,118 782,122 796,120 810,105 823,100 837,96 851,93 864,84 878,75 892,73 905,66 919,64 933,52 "/>
138 | <circle cx="86" cy="452" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
139 | <circle cx="99" cy="473" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
140 | <circle cx="113" cy="472" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
141 | <circle cx="126" cy="471" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
142 | <circle cx="140" cy="469" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
143 | <circle cx="154" cy="467" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
144 | <circle cx="167" cy="465" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
145 | <circle cx="181" cy="463" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
146 | <circle cx="195" cy="461" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
147 | <circle cx="208" cy="457" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
148 | <circle cx="222" cy="458" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
149 | <circle cx="236" cy="449" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
150 | <circle cx="249" cy="454" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
151 | <circle cx="263" cy="453" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
152 | <circle cx="277" cy="451" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
153 | <circle cx="290" cy="447" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
154 | <circle cx="304" cy="447" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
155 | <circle cx="318" cy="445" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
156 | <circle cx="331" cy="443" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
157 | <circle cx="345" cy="441" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
158 | <circle cx="359" cy="439" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
159 | <circle cx="372" cy="437" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
160 | <circle cx="386" cy="435" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
161 | <circle cx="400" cy="434" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
162 | <circle cx="413" cy="431" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
163 | <circle cx="427" cy="428" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
164 | <circle cx="441" cy="427" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
165 | <circle cx="454" cy="425" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
166 | <circle cx="468" cy="424" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
167 | <circle cx="482" cy="421" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
168 | <circle cx="495" cy="418" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
169 | <circle cx="509" cy="418" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
170 | <circle cx="523" cy="319" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
171 | <circle cx="536" cy="315" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
172 | <circle cx="550" cy="311" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
173 | <circle cx="564" cy="306" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
174 | <circle cx="577" cy="302" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
175 | <circle cx="591" cy="296" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
176 | <circle cx="605" cy="292" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
177 | <circle cx="618" cy="288" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
178 | <circle cx="632" cy="282" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
179 | <circle cx="646" cy="277" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
180 | <circle cx="659" cy="274" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
181 | <circle cx="673" cy="267" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
182 | <circle cx="687" cy="266" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
183 | <circle cx="700" cy="260" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
184 | <circle cx="714" cy="257" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
185 | <circle cx="728" cy="249" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
186 | <circle cx="741" cy="245" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
187 | <circle cx="755" cy="241" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
188 | <circle cx="769" cy="237" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
189 | <circle cx="782" cy="232" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
190 | <circle cx="796" cy="228" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
191 | <circle cx="810" cy="219" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
192 | <circle cx="823" cy="220" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
193 | <circle cx="837" cy="216" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
194 | <circle cx="851" cy="209" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
195 | <circle cx="864" cy="205" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
196 | <circle cx="878" cy="199" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
197 | <circle cx="892" cy="195" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
198 | <circle cx="905" cy="190" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
199 | <circle cx="919" cy="186" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
200 | <circle cx="933" cy="184" r="3" opacity="1" fill="#2E8B57" stroke="none" stroke-width="1"/>
201 | <polyline fill="none" opacity="1" stroke="#2E8B57" stroke-width="1" points="86,452 99,473 113,472 126,471 140,469 154,467 167,465 181,463 195,461 208,457 222,458 236,449 249,454 263,453 277,451 290,447 304,447 318,445 331,443 345,441 359,439 372,437 386,435 400,434 413,431 427,428 441,427 454,425 468,424 482,421 495,418 509,418 523,319 536,315 550,311 564,306 577,302 591,296 605,292 618,288 632,282 646,277 659,274 673,267 687,266 700,260 714,257 728,249 741,245 755,241 769,237 782,232 796,228 810,219 823,220 837,216 851,209 864,205 878,199 892,195 905,190 919,186 933,184 "/>
202 | <text x="131" y="67" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
203 | rapidfuzz
204 | </text>
205 | <text x="131" y="82" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
206 | rapidfuzz (BatchComparator)
207 | </text>
208 | <rect x="101" y="67" width="20" height="10" opacity="1" fill="#B22222" stroke="none"/>
209 | <rect x="101" y="82" width="20" height="10" opacity="1" fill="#2E8B57" stroke="none"/>
210 | </svg>
211 | 


--------------------------------------------------------------------------------
/src/common.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Debug;
 2 | 
 3 | #[derive(Default, Copy, Clone)]
 4 | pub struct NoScoreCutoff;
 5 | #[derive(Default, Copy, Clone)]
 6 | pub struct WithScoreCutoff<T>(pub T);
 7 | 
 8 | pub trait DistanceCutoff<T>
 9 | where
10 |     T: Copy,
11 | {
12 |     type Output: Copy + Into<Option<T>> + PartialEq + Debug;
13 | 
14 |     fn cutoff(&self) -> Option<T>;
15 |     fn score(&self, raw: T) -> Self::Output;
16 | }
17 | 
18 | impl<T> DistanceCutoff<T> for NoScoreCutoff
19 | where
20 |     T: Copy + PartialEq + Debug,
21 | {
22 |     type Output = T;
23 | 
24 |     fn cutoff(&self) -> Option<T> {
25 |         None
26 |     }
27 | 
28 |     fn score(&self, raw: T) -> Self::Output {
29 |         raw
30 |     }
31 | }
32 | 
33 | impl<T> DistanceCutoff<T> for WithScoreCutoff<T>
34 | where
35 |     T: Copy + PartialOrd + Debug,
36 | {
37 |     type Output = Option<T>;
38 | 
39 |     fn cutoff(&self) -> Option<T> {
40 |         Some(self.0)
41 |     }
42 | 
43 |     fn score(&self, raw: T) -> Self::Output {
44 |         (raw <= self.0).then_some(raw)
45 |     }
46 | }
47 | 
48 | pub trait SimilarityCutoff<T>
49 | where
50 |     T: Copy,
51 | {
52 |     type Output: Copy + Into<Option<T>> + PartialEq + Debug;
53 | 
54 |     fn cutoff(&self) -> Option<T>;
55 |     fn score(&self, raw: T) -> Self::Output;
56 | }
57 | 
58 | impl<T> SimilarityCutoff<T> for NoScoreCutoff
59 | where
60 |     T: Copy + PartialEq + Debug,
61 | {
62 |     type Output = T;
63 | 
64 |     fn cutoff(&self) -> Option<T> {
65 |         None
66 |     }
67 | 
68 |     fn score(&self, raw: T) -> Self::Output {
69 |         raw
70 |     }
71 | }
72 | 
73 | impl<T> SimilarityCutoff<T> for WithScoreCutoff<T>
74 | where
75 |     T: Copy + PartialOrd + Debug,
76 | {
77 |     type Output = Option<T>;
78 | 
79 |     fn cutoff(&self) -> Option<T> {
80 |         Some(self.0)
81 |     }
82 | 
83 |     fn score(&self, raw: T) -> Self::Output {
84 |         (raw >= self.0).then_some(raw)
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/details.rs:
--------------------------------------------------------------------------------
1 | pub mod common;
2 | pub mod distance;
3 | pub mod growing_hashmap;
4 | pub mod intrinsics;
5 | pub mod matrix;
6 | pub mod pattern_match_vector;
7 | 


--------------------------------------------------------------------------------
/src/details/common.rs:
--------------------------------------------------------------------------------
  1 | use crate::{Hash, HashableChar};
  2 | use std::iter::{Skip, Take};
  3 | 
  4 | pub fn norm_sim_to_norm_dist(score_cutoff: f64) -> f64 {
  5 |     let imprecision = 0.00001;
  6 |     (1.0 - score_cutoff + imprecision).min(1.0)
  7 | }
  8 | 
  9 | macro_rules! impl_hashable_char {
 10 |     ($base_type:ty, $kind:tt $(, $t:ty)*) => {
 11 |         impl HashableChar for $base_type {
 12 |             #[inline]
 13 |             fn hash_char(&self) -> Hash
 14 |             {
 15 |                 Hash::$kind(*self $(as $t)*)
 16 |             }
 17 |         }
 18 | 
 19 |         impl HashableChar for &$base_type {
 20 |             #[inline]
 21 |             fn hash_char(&self) -> Hash
 22 |             {
 23 |                 Hash::$kind(**self $(as $t)*)
 24 |             }
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | impl_hashable_char!(char, UNSIGNED, u32, u64);
 30 | impl_hashable_char!(i8, SIGNED, i64);
 31 | impl_hashable_char!(i16, SIGNED, i64);
 32 | impl_hashable_char!(i32, SIGNED, i64);
 33 | impl_hashable_char!(i64, SIGNED, i64);
 34 | impl_hashable_char!(u8, UNSIGNED, u64);
 35 | impl_hashable_char!(u16, UNSIGNED, u64);
 36 | impl_hashable_char!(u32, UNSIGNED, u64);
 37 | impl_hashable_char!(u64, UNSIGNED, u64);
 38 | 
 39 | pub fn find_common_prefix<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
 40 | where
 41 |     Iter1: Iterator + Clone,
 42 |     Iter2: Iterator + Clone,
 43 |     Iter1::Item: PartialEq<Iter2::Item>,
 44 |     Iter2::Item: PartialEq<Iter1::Item>,
 45 | {
 46 |     s1.zip(s2)
 47 |         .take_while(|(a_char, b_char)| a_char == b_char)
 48 |         .count()
 49 | }
 50 | 
 51 | pub fn find_common_suffix<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
 52 | where
 53 |     Iter1: DoubleEndedIterator + Clone,
 54 |     Iter2: DoubleEndedIterator + Clone,
 55 |     Iter1::Item: PartialEq<Iter2::Item>,
 56 |     Iter2::Item: PartialEq<Iter1::Item>,
 57 | {
 58 |     s1.rev()
 59 |         .zip(s2.rev())
 60 |         .take_while(|(a_char, b_char)| a_char == b_char)
 61 |         .count()
 62 | }
 63 | 
 64 | pub struct RemovedAffix<Iter1, Iter2>
 65 | where
 66 |     Iter1: DoubleEndedIterator + Clone,
 67 |     Iter2: DoubleEndedIterator + Clone,
 68 |     Iter1::Item: PartialEq<Iter2::Item>,
 69 |     Iter2::Item: PartialEq<Iter1::Item>,
 70 | {
 71 |     pub s1: Skip<Take<Iter1>>,
 72 |     pub len1: usize,
 73 |     pub s2: Skip<Take<Iter2>>,
 74 |     pub len2: usize,
 75 |     pub prefix_len: usize,
 76 |     pub suffix_len: usize,
 77 | }
 78 | 
 79 | pub fn remove_common_affix<Iter1, Iter2>(
 80 |     s1: Iter1,
 81 |     mut len1: usize,
 82 |     s2: Iter2,
 83 |     mut len2: usize,
 84 | ) -> RemovedAffix<Iter1, Iter2>
 85 | where
 86 |     Iter1: DoubleEndedIterator + Clone,
 87 |     Iter2: DoubleEndedIterator + Clone,
 88 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar,
 89 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar,
 90 | {
 91 |     let suffix_len = find_common_suffix(s1.clone(), s2.clone());
 92 |     let s1_iter_no_suffix = s1.take(len1 - suffix_len);
 93 |     let s2_iter_no_suffix = s2.take(len2 - suffix_len);
 94 |     let prefix_len = find_common_prefix(s1_iter_no_suffix.clone(), s2_iter_no_suffix.clone());
 95 |     let s1_iter = s1_iter_no_suffix.skip(prefix_len);
 96 |     let s2_iter = s2_iter_no_suffix.skip(prefix_len);
 97 |     len1 -= prefix_len + suffix_len;
 98 |     len2 -= prefix_len + suffix_len;
 99 | 
100 |     RemovedAffix {
101 |         s1: s1_iter,
102 |         len1,
103 |         s2: s2_iter,
104 |         len2,
105 |         prefix_len,
106 |         suffix_len,
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/details/distance.rs:
--------------------------------------------------------------------------------
  1 | use crate::details::common::norm_sim_to_norm_dist;
  2 | use crate::HashableChar;
  3 | 
  4 | pub trait MetricUsize2 {
  5 |     fn maximum(&self, len1: usize, len2: usize) -> usize;
  6 | 
  7 |     fn _distance<Iter1, Iter2>(
  8 |         &self,
  9 |         s1: Iter1,
 10 |         len1: usize,
 11 |         s2: Iter2,
 12 |         len2: usize,
 13 |         score_cutoff: Option<usize>,
 14 |         score_hint: Option<usize>,
 15 |     ) -> Option<usize>
 16 |     where
 17 |         Iter1: DoubleEndedIterator + Clone,
 18 |         Iter2: DoubleEndedIterator + Clone,
 19 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 20 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 21 |     {
 22 |         let maximum = self.maximum(len1, len2);
 23 | 
 24 |         let cutoff_similarity = score_cutoff.map(|x| if maximum >= x { maximum - x } else { 0 });
 25 |         let hint_similarity = score_hint.map(|x| if maximum >= x { maximum - x } else { 0 });
 26 | 
 27 |         let sim = self._similarity(s1, len1, s2, len2, cutoff_similarity, hint_similarity)?;
 28 |         let dist = maximum - sim;
 29 | 
 30 |         if let Some(cutoff) = score_cutoff {
 31 |             if dist > cutoff {
 32 |                 return None;
 33 |             }
 34 |         }
 35 |         Some(dist)
 36 |     }
 37 | 
 38 |     fn _similarity<Iter1, Iter2>(
 39 |         &self,
 40 |         s1: Iter1,
 41 |         len1: usize,
 42 |         s2: Iter2,
 43 |         len2: usize,
 44 |         score_cutoff: Option<usize>,
 45 |         mut score_hint: Option<usize>,
 46 |     ) -> Option<usize>
 47 |     where
 48 |         Iter1: DoubleEndedIterator + Clone,
 49 |         Iter2: DoubleEndedIterator + Clone,
 50 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 51 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 52 |     {
 53 |         let maximum = self.maximum(len1, len2);
 54 |         if let Some(cutoff) = score_cutoff {
 55 |             if maximum < cutoff {
 56 |                 return None;
 57 |             }
 58 | 
 59 |             if let Some(hint) = score_hint {
 60 |                 score_hint = Some(hint.min(cutoff));
 61 |             }
 62 |         }
 63 | 
 64 |         let cutoff_distance = score_cutoff.map(|x| maximum - x);
 65 |         let hint_distance = score_hint.map(|x| maximum - x);
 66 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance)?;
 67 |         let sim = maximum - dist;
 68 |         if let Some(cutoff) = score_cutoff {
 69 |             if sim < cutoff {
 70 |                 return None;
 71 |             }
 72 |         }
 73 |         Some(sim)
 74 |     }
 75 | 
 76 |     fn _normalized_distance<Iter1, Iter2>(
 77 |         &self,
 78 |         s1: Iter1,
 79 |         len1: usize,
 80 |         s2: Iter2,
 81 |         len2: usize,
 82 |         mut score_cutoff: Option<f64>,
 83 |         score_hint: Option<f64>,
 84 |     ) -> Option<f64>
 85 |     where
 86 |         Iter1: DoubleEndedIterator + Clone,
 87 |         Iter2: DoubleEndedIterator + Clone,
 88 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 89 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 90 |     {
 91 |         let maximum = self.maximum(len1, len2);
 92 | 
 93 |         let cutoff_distance;
 94 |         if let Some(mut cutoff) = score_cutoff {
 95 |             cutoff = cutoff.clamp(0.0, 1.0);
 96 |             score_cutoff = Some(cutoff);
 97 |             cutoff_distance = Some((maximum as f64 * cutoff).ceil() as usize);
 98 |         } else {
 99 |             cutoff_distance = None;
100 |         }
101 | 
102 |         let hint_distance;
103 |         if let Some(mut cutoff) = score_hint {
104 |             cutoff = cutoff.clamp(0.0, 1.0);
105 |             hint_distance = Some((maximum as f64 * cutoff).ceil() as usize);
106 |         } else {
107 |             hint_distance = None;
108 |         }
109 | 
110 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance)?;
111 |         let norm_dist = if maximum == 0 {
112 |             0.0
113 |         } else {
114 |             dist as f64 / maximum as f64
115 |         };
116 |         if let Some(cutoff) = score_cutoff {
117 |             if norm_dist > cutoff {
118 |                 return None;
119 |             }
120 |         }
121 |         Some(norm_dist)
122 |     }
123 | 
124 |     fn _normalized_similarity<Iter1, Iter2>(
125 |         &self,
126 |         s1: Iter1,
127 |         len1: usize,
128 |         s2: Iter2,
129 |         len2: usize,
130 |         score_cutoff: Option<f64>,
131 |         score_hint: Option<f64>,
132 |     ) -> Option<f64>
133 |     where
134 |         Iter1: DoubleEndedIterator + Clone,
135 |         Iter2: DoubleEndedIterator + Clone,
136 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
137 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
138 |     {
139 |         let cutoff_score = score_cutoff.map(norm_sim_to_norm_dist);
140 |         let hint_score = score_hint.map(norm_sim_to_norm_dist);
141 | 
142 |         let norm_dist = self._normalized_distance(s1, len1, s2, len2, cutoff_score, hint_score)?;
143 |         let norm_sim = 1.0 - norm_dist;
144 | 
145 |         if let Some(cutoff) = score_cutoff {
146 |             if norm_sim < cutoff {
147 |                 return None;
148 |             }
149 |         }
150 |         Some(norm_sim)
151 |     }
152 | }
153 | 
154 | pub trait MetricUsize {
155 |     fn maximum(&self, len1: usize, len2: usize) -> usize;
156 | 
157 |     fn _distance<Iter1, Iter2>(
158 |         &self,
159 |         s1: Iter1,
160 |         len1: usize,
161 |         s2: Iter2,
162 |         len2: usize,
163 |         score_cutoff: Option<usize>,
164 |         score_hint: Option<usize>,
165 |     ) -> usize
166 |     where
167 |         Iter1: DoubleEndedIterator + Clone,
168 |         Iter2: DoubleEndedIterator + Clone,
169 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
170 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
171 |     {
172 |         let maximum = self.maximum(len1, len2);
173 | 
174 |         let cutoff_similarity = score_cutoff.map(|x| if maximum >= x { maximum - x } else { 0 });
175 |         let hint_similarity = score_hint.map(|x| if maximum >= x { maximum - x } else { 0 });
176 | 
177 |         let sim = self._similarity(s1, len1, s2, len2, cutoff_similarity, hint_similarity);
178 |         maximum - sim
179 |     }
180 | 
181 |     fn _similarity<Iter1, Iter2>(
182 |         &self,
183 |         s1: Iter1,
184 |         len1: usize,
185 |         s2: Iter2,
186 |         len2: usize,
187 |         score_cutoff: Option<usize>,
188 |         mut score_hint: Option<usize>,
189 |     ) -> usize
190 |     where
191 |         Iter1: DoubleEndedIterator + Clone,
192 |         Iter2: DoubleEndedIterator + Clone,
193 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
194 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
195 |     {
196 |         let maximum = self.maximum(len1, len2);
197 |         if let Some(cutoff) = score_cutoff {
198 |             if cutoff > maximum {
199 |                 return maximum;
200 |             }
201 | 
202 |             if let Some(hint) = score_hint {
203 |                 score_hint = Some(hint.min(cutoff));
204 |             }
205 |         }
206 | 
207 |         let cutoff_distance = score_cutoff.map(|x| maximum - x);
208 |         let hint_distance = score_hint.map(|x| maximum - x);
209 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance);
210 |         maximum - dist
211 |     }
212 | 
213 |     fn _normalized_distance<Iter1, Iter2>(
214 |         &self,
215 |         s1: Iter1,
216 |         len1: usize,
217 |         s2: Iter2,
218 |         len2: usize,
219 |         score_cutoff: Option<f64>,
220 |         score_hint: Option<f64>,
221 |     ) -> f64
222 |     where
223 |         Iter1: DoubleEndedIterator + Clone,
224 |         Iter2: DoubleEndedIterator + Clone,
225 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
226 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
227 |     {
228 |         let maximum = self.maximum(len1, len2);
229 | 
230 |         let cutoff_distance;
231 |         if let Some(mut cutoff) = score_cutoff {
232 |             cutoff = cutoff.clamp(0.0, 1.0);
233 |             cutoff_distance = Some((maximum as f64 * cutoff).ceil() as usize);
234 |         } else {
235 |             cutoff_distance = None;
236 |         }
237 | 
238 |         let hint_distance;
239 |         if let Some(mut cutoff) = score_hint {
240 |             cutoff = cutoff.clamp(0.0, 1.0);
241 |             hint_distance = Some((maximum as f64 * cutoff).ceil() as usize);
242 |         } else {
243 |             hint_distance = None;
244 |         }
245 | 
246 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance);
247 |         if maximum == 0 {
248 |             0.0
249 |         } else {
250 |             dist as f64 / maximum as f64
251 |         }
252 |     }
253 | 
254 |     fn _normalized_similarity<Iter1, Iter2>(
255 |         &self,
256 |         s1: Iter1,
257 |         len1: usize,
258 |         s2: Iter2,
259 |         len2: usize,
260 |         score_cutoff: Option<f64>,
261 |         score_hint: Option<f64>,
262 |     ) -> f64
263 |     where
264 |         Iter1: DoubleEndedIterator + Clone,
265 |         Iter2: DoubleEndedIterator + Clone,
266 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
267 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
268 |     {
269 |         let cutoff_score = score_cutoff.map(norm_sim_to_norm_dist);
270 |         let hint_score = score_hint.map(norm_sim_to_norm_dist);
271 | 
272 |         let norm_dist = self._normalized_distance(s1, len1, s2, len2, cutoff_score, hint_score);
273 |         1.0 - norm_dist
274 |     }
275 | }
276 | 
277 | pub trait Metricf64 {
278 |     fn maximum(&self, len1: usize, len2: usize) -> f64;
279 | 
280 |     fn _distance<Iter1, Iter2>(
281 |         &self,
282 |         s1: Iter1,
283 |         len1: usize,
284 |         s2: Iter2,
285 |         len2: usize,
286 |         score_cutoff: Option<f64>,
287 |         score_hint: Option<f64>,
288 |     ) -> f64
289 |     where
290 |         Iter1: DoubleEndedIterator + Clone,
291 |         Iter2: DoubleEndedIterator + Clone,
292 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
293 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
294 |     {
295 |         let maximum = self.maximum(len1, len2);
296 | 
297 |         let cutoff_similarity = score_cutoff.map(|x| if maximum >= x { maximum - x } else { 0.0 });
298 |         let hint_similarity = score_hint.map(|x| if maximum >= x { maximum - x } else { 0.0 });
299 | 
300 |         let sim = self._similarity(s1, len1, s2, len2, cutoff_similarity, hint_similarity);
301 |         maximum - sim
302 |     }
303 | 
304 |     fn _similarity<Iter1, Iter2>(
305 |         &self,
306 |         s1: Iter1,
307 |         len1: usize,
308 |         s2: Iter2,
309 |         len2: usize,
310 |         score_cutoff: Option<f64>,
311 |         mut score_hint: Option<f64>,
312 |     ) -> f64
313 |     where
314 |         Iter1: DoubleEndedIterator + Clone,
315 |         Iter2: DoubleEndedIterator + Clone,
316 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
317 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
318 |     {
319 |         let maximum = self.maximum(len1, len2);
320 |         if let Some(cutoff) = score_cutoff {
321 |             if cutoff > maximum {
322 |                 return maximum;
323 |             }
324 | 
325 |             if let Some(hint) = score_hint {
326 |                 score_hint = Some(hint.min(cutoff));
327 |             }
328 |         }
329 | 
330 |         let cutoff_distance = score_cutoff.map(|x| maximum - x);
331 |         let hint_distance = score_hint.map(|x| maximum - x);
332 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance);
333 |         maximum - dist
334 |     }
335 | 
336 |     fn _normalized_distance<Iter1, Iter2>(
337 |         &self,
338 |         s1: Iter1,
339 |         len1: usize,
340 |         s2: Iter2,
341 |         len2: usize,
342 |         score_cutoff: Option<f64>,
343 |         score_hint: Option<f64>,
344 |     ) -> f64
345 |     where
346 |         Iter1: DoubleEndedIterator + Clone,
347 |         Iter2: DoubleEndedIterator + Clone,
348 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
349 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
350 |     {
351 |         let maximum = self.maximum(len1, len2);
352 | 
353 |         let cutoff_distance = score_cutoff.map(|x| maximum * x);
354 |         let hint_distance = score_hint.map(|x| maximum * x);
355 | 
356 |         let dist = self._distance(s1, len1, s2, len2, cutoff_distance, hint_distance);
357 |         if maximum > 0.0 {
358 |             dist / maximum
359 |         } else {
360 |             0.0
361 |         }
362 |     }
363 | 
364 |     fn _normalized_similarity<Iter1, Iter2>(
365 |         &self,
366 |         s1: Iter1,
367 |         len1: usize,
368 |         s2: Iter2,
369 |         len2: usize,
370 |         score_cutoff: Option<f64>,
371 |         score_hint: Option<f64>,
372 |     ) -> f64
373 |     where
374 |         Iter1: DoubleEndedIterator + Clone,
375 |         Iter2: DoubleEndedIterator + Clone,
376 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
377 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
378 |     {
379 |         let cutoff_score = score_cutoff.map(norm_sim_to_norm_dist);
380 |         let hint_score = score_hint.map(norm_sim_to_norm_dist);
381 | 
382 |         let norm_dist = self._normalized_distance(s1, len1, s2, len2, cutoff_score, hint_score);
383 |         1.0 - norm_dist
384 |     }
385 | }
386 | 


--------------------------------------------------------------------------------
/src/details/growing_hashmap.rs:
--------------------------------------------------------------------------------
  1 | use crate::{Hash, HashableChar};
  2 | 
  3 | #[derive(Default, Clone)]
  4 | struct GrowingHashmapMapElem<ValueType> {
  5 |     key: u64,
  6 |     value: ValueType,
  7 | }
  8 | 
  9 | /// specialized hashmap to store user provided types
 10 | /// this implementation relies on a couple of base assumptions in order to simplify the implementation
 11 | /// - the hashmap does not have an upper limit of included items
 12 | /// - the default value for the `ValueType` can be used as a dummy value to indicate an empty cell
 13 | /// - elements can't be removed
 14 | /// - only allocates memory on first write access.
 15 | ///   This improves performance for hashmaps that are never written to
 16 | pub struct GrowingHashmap<ValueType> {
 17 |     used: i32,
 18 |     fill: i32,
 19 |     mask: i32,
 20 |     map: Option<Vec<GrowingHashmapMapElem<ValueType>>>,
 21 | }
 22 | 
 23 | impl<ValueType> Default for GrowingHashmap<ValueType>
 24 | where
 25 |     ValueType: Default + Clone + Eq,
 26 | {
 27 |     #[inline]
 28 |     fn default() -> Self {
 29 |         Self {
 30 |             used: 0,
 31 |             fill: 0,
 32 |             mask: -1,
 33 |             map: None,
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl<ValueType> GrowingHashmap<ValueType>
 39 | where
 40 |     ValueType: Default + Clone + Eq + Copy,
 41 | {
 42 |     #[allow(dead_code)]
 43 |     pub const fn size(&self) -> i32 {
 44 |         self.used
 45 |     }
 46 | 
 47 |     #[allow(dead_code)]
 48 |     pub const fn capacity(&self) -> i32 {
 49 |         self.mask + 1
 50 |     }
 51 | 
 52 |     #[allow(dead_code)]
 53 |     pub const fn empty(&self) -> bool {
 54 |         self.used == 0
 55 |     }
 56 | 
 57 |     pub fn get(&self, key: u64) -> ValueType {
 58 |         self.map
 59 |             .as_ref()
 60 |             .map_or_else(|| Default::default(), |map| map[self.lookup(key)].value)
 61 |     }
 62 | 
 63 |     pub fn get_mut(&mut self, key: u64) -> &mut ValueType {
 64 |         if self.map.is_none() {
 65 |             self.allocate();
 66 |         }
 67 | 
 68 |         let mut i = self.lookup(key);
 69 |         if self
 70 |             .map
 71 |             .as_ref()
 72 |             .expect("map should have been created above")[i]
 73 |             .value
 74 |             == Default::default()
 75 |         {
 76 |             self.fill += 1;
 77 |             // resize when 2/3 full
 78 |             if self.fill * 3 >= (self.mask + 1) * 2 {
 79 |                 self.grow((self.used + 1) * 2);
 80 |                 i = self.lookup(key);
 81 |             }
 82 | 
 83 |             self.used += 1;
 84 |         }
 85 | 
 86 |         let elem = &mut self
 87 |             .map
 88 |             .as_mut()
 89 |             .expect("map should have been created above")[i];
 90 |         elem.key = key;
 91 |         &mut elem.value
 92 |     }
 93 | 
 94 |     fn allocate(&mut self) {
 95 |         self.mask = 8 - 1;
 96 |         self.map = Some(vec![GrowingHashmapMapElem::default(); 8]);
 97 |     }
 98 | 
 99 |     /// lookup key inside the hashmap using a similar collision resolution
100 |     /// strategy to `CPython` and `Ruby`
101 |     fn lookup(&self, key: u64) -> usize {
102 |         let hash = key;
103 |         let mut i = hash as usize & self.mask as usize;
104 | 
105 |         let map = self
106 |             .map
107 |             .as_ref()
108 |             .expect("callers have to ensure map is allocated");
109 | 
110 |         if map[i].value == Default::default() || map[i].key == key {
111 |             return i;
112 |         }
113 | 
114 |         let mut perturb = key;
115 |         loop {
116 |             i = (i * 5 + perturb as usize + 1) & self.mask as usize;
117 | 
118 |             if map[i].value == Default::default() || map[i].key == key {
119 |                 return i;
120 |             }
121 | 
122 |             perturb >>= 5;
123 |         }
124 |     }
125 | 
126 |     fn grow(&mut self, min_used: i32) {
127 |         let mut new_size = self.mask + 1;
128 |         while new_size <= min_used {
129 |             new_size <<= 1;
130 |         }
131 | 
132 |         self.fill = self.used;
133 |         self.mask = new_size - 1;
134 | 
135 |         let old_map = std::mem::replace(
136 |             self.map
137 |                 .as_mut()
138 |                 .expect("callers have to ensure map is allocated"),
139 |             vec![GrowingHashmapMapElem::<ValueType>::default(); new_size as usize],
140 |         );
141 | 
142 |         for elem in old_map {
143 |             if elem.value != Default::default() {
144 |                 let j = self.lookup(elem.key);
145 |                 let new_elem = &mut self.map.as_mut().expect("map created above")[j];
146 |                 new_elem.key = elem.key;
147 |                 new_elem.value = elem.value;
148 |                 self.used -= 1;
149 |                 if self.used == 0 {
150 |                     break;
151 |                 }
152 |             }
153 |         }
154 | 
155 |         self.used = self.fill;
156 |     }
157 | }
158 | 
159 | pub struct HybridGrowingHashmap<ValueType> {
160 |     // todo in theory we have a fixed keytype here and so we wouldn't need both
161 |     // an unsigned and signed map. In Practice this probably doesn't matter all that much
162 |     pub map_unsigned: GrowingHashmap<ValueType>,
163 |     pub map_signed: GrowingHashmap<ValueType>,
164 |     pub extended_ascii: [ValueType; 256],
165 | }
166 | 
167 | impl<ValueType> HybridGrowingHashmap<ValueType>
168 | where
169 |     ValueType: Default + Clone + Copy + Eq,
170 | {
171 |     // right now this can't be used since rust fails to elide the memcpy
172 |     // on return
173 |     /*pub fn new() -> Self {
174 |         HybridGrowingHashmap {
175 |             map_unsigned: GrowingHashmap::default(),
176 |             map_signed: GrowingHashmap::default(),
177 |             extended_ascii: [Default::default(); 256],
178 |         }
179 |     }*/
180 | 
181 |     pub fn get<CharT>(&self, key: CharT) -> ValueType
182 |     where
183 |         CharT: HashableChar,
184 |     {
185 |         match key.hash_char() {
186 |             Hash::SIGNED(value) => {
187 |                 if value < 0 {
188 |                     self.map_signed.get(u64::from_ne_bytes(value.to_ne_bytes()))
189 |                 } else if value <= 255 {
190 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
191 |                     self.extended_ascii[usize::from(val_u8)]
192 |                 } else {
193 |                     self.map_unsigned
194 |                         .get(u64::from_ne_bytes(value.to_ne_bytes()))
195 |                 }
196 |             }
197 |             Hash::UNSIGNED(value) => {
198 |                 if value <= 255 {
199 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
200 |                     self.extended_ascii[usize::from(val_u8)]
201 |                 } else {
202 |                     self.map_unsigned.get(value)
203 |                 }
204 |             }
205 |         }
206 |     }
207 | 
208 |     pub fn get_mut<CharT>(&mut self, key: CharT) -> &mut ValueType
209 |     where
210 |         CharT: HashableChar,
211 |     {
212 |         match key.hash_char() {
213 |             Hash::SIGNED(value) => {
214 |                 if value < 0 {
215 |                     self.map_signed
216 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()))
217 |                 } else if value <= 255 {
218 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
219 |                     &mut self.extended_ascii[usize::from(val_u8)]
220 |                 } else {
221 |                     self.map_unsigned
222 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()))
223 |                 }
224 |             }
225 |             Hash::UNSIGNED(value) => {
226 |                 if value <= 255 {
227 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
228 |                     &mut self.extended_ascii[usize::from(val_u8)]
229 |                 } else {
230 |                     self.map_unsigned.get_mut(value)
231 |                 }
232 |             }
233 |         }
234 |     }
235 | }
236 | 


--------------------------------------------------------------------------------
/src/details/intrinsics.rs:
--------------------------------------------------------------------------------
 1 | pub const fn ceil_div_usize(a: usize, divisor: usize) -> usize {
 2 |     a / divisor + (a % divisor != 0) as usize
 3 | }
 4 | 
 5 | /// shift right without undefined behavior for shifts > bit width
 6 | pub const fn shr64(a: u64, shift: usize) -> u64 {
 7 |     if shift < 64 {
 8 |         a >> shift
 9 |     } else {
10 |         0
11 |     }
12 | }
13 | 
14 | /// shift left without undefined behavior for shifts > bit width
15 | #[allow(dead_code)]
16 | pub const fn shl64(a: u64, shift: usize) -> u64 {
17 |     if shift < 64 {
18 |         a << shift
19 |     } else {
20 |         0
21 |     }
22 | }
23 | 
24 | // this is still a nightly only api. Can be removed if it becomes stable
25 | pub const fn carrying_add(lhs: u64, rhs: u64, carry: bool) -> (u64, bool) {
26 |     let (a, b) = lhs.overflowing_add(rhs);
27 |     let (c, d) = a.overflowing_add(carry as u64);
28 |     (c, b | d)
29 | }
30 | 
31 | pub const fn bit_mask_lsb_u64(n: usize) -> u64 {
32 |     let mut mask = !0_u64;
33 |     if n < 64 {
34 |         mask = mask.wrapping_add(1_u64 << n);
35 |     }
36 |     mask
37 | }
38 | pub const fn blsi_u64(v: u64) -> u64 {
39 |     v & v.wrapping_neg()
40 | }
41 | 
42 | #[allow(dead_code)]
43 | pub const fn blsr_u64(v: u64) -> u64 {
44 |     v & v.wrapping_sub(1)
45 | }
46 | 


--------------------------------------------------------------------------------
/src/details/matrix.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::{BitAnd, Shl};
  2 | 
  3 | #[derive(Clone)]
  4 | pub struct BitMatrix<T> {
  5 |     rows: usize,
  6 |     cols: usize,
  7 |     matrix: Vec<T>,
  8 | }
  9 | 
 10 | impl<T> BitMatrix<T>
 11 | where
 12 |     T: Clone,
 13 | {
 14 |     pub fn new(rows: usize, cols: usize, val: T) -> Self {
 15 |         Self {
 16 |             rows,
 17 |             cols,
 18 |             matrix: vec![val; rows * cols],
 19 |         }
 20 |     }
 21 | 
 22 |     #[allow(dead_code)]
 23 |     pub const fn rows(&self) -> usize {
 24 |         self.rows
 25 |     }
 26 | 
 27 |     #[allow(dead_code)]
 28 |     pub const fn cols(&self) -> usize {
 29 |         self.cols
 30 |     }
 31 | 
 32 |     pub fn get(&self, row: usize, col: usize) -> &T {
 33 |         debug_assert!(row < self.rows);
 34 |         debug_assert!(col < self.cols);
 35 |         &self.matrix[row * self.cols + col]
 36 |     }
 37 | 
 38 |     pub fn get_mut(&mut self, row: usize, col: usize) -> &mut T {
 39 |         debug_assert!(row < self.rows);
 40 |         debug_assert!(col < self.cols);
 41 |         &mut self.matrix[row * self.cols + col]
 42 |     }
 43 | }
 44 | 
 45 | pub struct ShiftedBitMatrix<T> {
 46 |     matrix: BitMatrix<T>,
 47 |     offsets: Vec<isize>,
 48 | }
 49 | 
 50 | impl<T> ShiftedBitMatrix<T>
 51 | where
 52 |     T: Copy + From<u8> + Shl<usize, Output = T> + BitAnd<T, Output = T> + PartialEq<T>,
 53 | {
 54 |     pub fn new(rows: usize, cols: usize, val: T) -> Self {
 55 |         Self {
 56 |             matrix: BitMatrix::<T>::new(rows, cols, val),
 57 |             offsets: vec![0; rows],
 58 |         }
 59 |     }
 60 | 
 61 |     #[allow(dead_code)]
 62 |     pub fn test_bit(&self, row: usize, mut col: usize, default: bool) -> bool {
 63 |         let offset = self.offsets[row];
 64 | 
 65 |         if offset < 0 {
 66 |             col += (-offset) as usize;
 67 |         } else if col >= offset as usize {
 68 |             col -= offset as usize;
 69 |         }
 70 |         // bit on the left of the band
 71 |         else {
 72 |             return default;
 73 |         }
 74 | 
 75 |         let word_size = std::mem::size_of::<T>() * 8;
 76 |         let col_word = col / word_size;
 77 |         let col_mask = T::from(1) << (col % word_size);
 78 | 
 79 |         (*self.matrix.get(row, col_word) & col_mask) != T::from(0)
 80 |     }
 81 | 
 82 |     #[allow(dead_code)]
 83 |     pub fn get(&self, row: usize, col: usize) -> &T {
 84 |         self.matrix.get(row, col)
 85 |     }
 86 | 
 87 |     pub fn get_mut(&mut self, row: usize, col: usize) -> &mut T {
 88 |         self.matrix.get_mut(row, col)
 89 |     }
 90 | 
 91 |     pub fn set_offset(&mut self, row: usize, offset: isize) {
 92 |         self.offsets[row] = offset;
 93 |     }
 94 | }
 95 | 
 96 | impl<T> Default for ShiftedBitMatrix<T>
 97 | where
 98 |     T: Copy + From<u8> + Shl<usize, Output = T> + BitAnd<T, Output = T> + PartialEq<T>,
 99 | {
100 |     fn default() -> Self {
101 |         Self::new(0, 0, T::from(0))
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/details/pattern_match_vector.rs:
--------------------------------------------------------------------------------
  1 | use crate::details::intrinsics::ceil_div_usize;
  2 | use crate::details::matrix::BitMatrix;
  3 | use crate::{Hash, HashableChar};
  4 | 
  5 | #[derive(Clone, Copy, Default)]
  6 | struct BitvectorHashmapMapElem {
  7 |     key: u64,
  8 |     value: u64,
  9 | }
 10 | 
 11 | /// specialized hashmap to store bitvectors
 12 | /// this implementation relies on a couple of base assumptions in order to simplify the implementation
 13 | /// - the hashmap includes at most 64 different items
 14 | /// - since bitvectors are only in use when at least one bit is set, 0 can be used to indicate an unused element
 15 | /// - elements are never explicitly removed. When changing a sliding window over a string, shifting the corresponding
 16 | ///   bits would eventually be 0 -> removed the element
 17 | /// - works with u64 keys. The caller has to ensure these have no collisions when using e.g. a mixture of u64 and i64 elements
 18 | ///   this can be done e.g. by using two hashmaps one for values < 0 and one for values >= 0
 19 | #[derive(Clone)]
 20 | pub struct BitvectorHashmap {
 21 |     map: [BitvectorHashmapMapElem; 128],
 22 | }
 23 | 
 24 | impl Default for BitvectorHashmap {
 25 |     #[inline]
 26 |     fn default() -> Self {
 27 |         Self {
 28 |             map: [BitvectorHashmapMapElem::default(); 128],
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | impl BitvectorHashmap {
 34 |     pub const fn get(&self, key: u64) -> u64 {
 35 |         self.map[self.lookup(key)].value
 36 |     }
 37 | 
 38 |     pub fn get_mut(&mut self, key: u64) -> &mut u64 {
 39 |         let i = self.lookup(key);
 40 |         let elem = &mut self.map[i];
 41 |         elem.key = key;
 42 |         &mut elem.value
 43 |     }
 44 | 
 45 |     /// lookup key inside the hashmap using a similar collision resolution
 46 |     /// strategy to `CPython` and `Ruby`
 47 |     const fn lookup(&self, key: u64) -> usize {
 48 |         let mut i = (key % 128) as usize;
 49 | 
 50 |         if self.map[i].value == 0 || self.map[i].key == key {
 51 |             return i;
 52 |         }
 53 | 
 54 |         let mut perturb = key;
 55 |         loop {
 56 |             i = (i * 5 + perturb as usize + 1) % 128;
 57 | 
 58 |             if self.map[i].value == 0 || self.map[i].key == key {
 59 |                 return i;
 60 |             }
 61 | 
 62 |             perturb >>= 5;
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | pub struct PatternMatchVector {
 68 |     pub extended_ascii: [u64; 256],
 69 |     pub map_unsigned: Option<BitvectorHashmap>,
 70 |     pub map_signed: Option<BitvectorHashmap>,
 71 | }
 72 | 
 73 | pub trait BitVectorInterface {
 74 |     fn get<CharT>(&self, block: usize, key: CharT) -> u64
 75 |     where
 76 |         CharT: HashableChar;
 77 | 
 78 |     fn size(&self) -> usize;
 79 | }
 80 | 
 81 | impl Default for PatternMatchVector {
 82 |     fn default() -> Self {
 83 |         Self {
 84 |             map_unsigned: None,
 85 |             map_signed: None,
 86 |             extended_ascii: [0; 256],
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | impl PatternMatchVector {
 92 |     pub fn insert<Iter1, CharT>(&mut self, s1: Iter1)
 93 |     where
 94 |         Iter1: Iterator<Item = CharT>,
 95 |         CharT: HashableChar,
 96 |     {
 97 |         let mut mask: u64 = 1;
 98 |         for ch in s1 {
 99 |             self.insert_mask(ch, mask);
100 |             mask <<= 1;
101 |         }
102 |     }
103 | 
104 |     fn insert_mask<CharT>(&mut self, key: CharT, mask: u64)
105 |     where
106 |         CharT: HashableChar,
107 |     {
108 |         match key.hash_char() {
109 |             Hash::SIGNED(value) => {
110 |                 if value < 0 {
111 |                     if self.map_signed.is_none() {
112 |                         self.map_signed = Some(BitvectorHashmap::default());
113 |                     }
114 |                     let item = self
115 |                         .map_signed
116 |                         .as_mut()
117 |                         .expect("map should have been created above")
118 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()));
119 |                     *item |= mask;
120 |                 } else if value <= 255 {
121 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
122 |                     let item = &mut self.extended_ascii[usize::from(val_u8)];
123 |                     *item |= mask;
124 |                 } else {
125 |                     if self.map_unsigned.is_none() {
126 |                         self.map_unsigned = Some(BitvectorHashmap::default());
127 |                     }
128 |                     let item = self
129 |                         .map_unsigned
130 |                         .as_mut()
131 |                         .expect("map should have been created above")
132 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()));
133 |                     *item |= mask;
134 |                 }
135 |             }
136 |             Hash::UNSIGNED(value) => {
137 |                 if value <= 255 {
138 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
139 |                     let item = &mut self.extended_ascii[usize::from(val_u8)];
140 |                     *item |= mask;
141 |                 } else {
142 |                     if self.map_unsigned.is_none() {
143 |                         self.map_unsigned = Some(BitvectorHashmap::default());
144 |                     }
145 |                     let item = self
146 |                         .map_unsigned
147 |                         .as_mut()
148 |                         .expect("map should have been created above")
149 |                         .get_mut(value);
150 |                     *item |= mask;
151 |                 }
152 |             }
153 |         }
154 |     }
155 | }
156 | 
157 | impl BitVectorInterface for PatternMatchVector {
158 |     fn get<CharT>(&self, block: usize, key: CharT) -> u64
159 |     where
160 |         CharT: HashableChar,
161 |     {
162 |         debug_assert!(block == 0);
163 |         match key.hash_char() {
164 |             Hash::SIGNED(value) => {
165 |                 if value < 0 {
166 |                     self.map_signed
167 |                         .as_ref()
168 |                         .map_or(0, |map| map.get(u64::from_ne_bytes(value.to_ne_bytes())))
169 |                 } else if value <= 255 {
170 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
171 |                     self.extended_ascii[usize::from(val_u8)]
172 |                 } else {
173 |                     self.map_unsigned
174 |                         .as_ref()
175 |                         .map_or(0, |map| map.get(u64::from_ne_bytes(value.to_ne_bytes())))
176 |                 }
177 |             }
178 |             Hash::UNSIGNED(value) => {
179 |                 if value <= 255 {
180 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
181 |                     self.extended_ascii[usize::from(val_u8)]
182 |                 } else {
183 |                     self.map_unsigned.as_ref().map_or(0, |map| map.get(value))
184 |                 }
185 |             }
186 |         }
187 |     }
188 | 
189 |     fn size(&self) -> usize {
190 |         1
191 |     }
192 | }
193 | 
194 | #[derive(Clone)]
195 | pub struct BlockPatternMatchVector {
196 |     pub block_count: usize,
197 |     pub map_unsigned: Option<Vec<BitvectorHashmap>>,
198 |     pub map_signed: Option<Vec<BitvectorHashmap>>,
199 |     pub extended_ascii: BitMatrix<u64>,
200 | }
201 | 
202 | impl BlockPatternMatchVector {
203 |     pub fn new(str_len: usize) -> Self {
204 |         let block_count = ceil_div_usize(str_len, 64);
205 |         Self {
206 |             block_count,
207 |             map_unsigned: None,
208 |             map_signed: None,
209 |             extended_ascii: BitMatrix::<u64>::new(256, block_count, 0),
210 |         }
211 |     }
212 | 
213 |     pub fn insert<Iter1, CharT>(&mut self, s1: Iter1)
214 |     where
215 |         Iter1: Iterator<Item = CharT>,
216 |         CharT: HashableChar,
217 |     {
218 |         let mut mask: u64 = 1;
219 |         for (i, ch) in s1.enumerate() {
220 |             let block = i / 64;
221 |             self.insert_mask(block, ch, mask);
222 |             mask = mask.rotate_left(1);
223 |         }
224 |     }
225 | 
226 |     fn insert_mask<CharT>(&mut self, block: usize, key: CharT, mask: u64)
227 |     where
228 |         CharT: HashableChar,
229 |     {
230 |         debug_assert!(block < self.size());
231 | 
232 |         match key.hash_char() {
233 |             Hash::SIGNED(value) => {
234 |                 if value < 0 {
235 |                     if self.map_signed.is_none() {
236 |                         self.map_signed = Some(vec![BitvectorHashmap::default(); self.block_count]);
237 |                     }
238 |                     let item = self
239 |                         .map_signed
240 |                         .as_mut()
241 |                         .expect("map should have been created above")[block]
242 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()));
243 |                     *item |= mask;
244 |                 } else if value <= 255 {
245 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
246 |                     let item = self.extended_ascii.get_mut(val_u8.into(), block);
247 |                     *item |= mask;
248 |                 } else {
249 |                     if self.map_unsigned.is_none() {
250 |                         self.map_unsigned =
251 |                             Some(vec![BitvectorHashmap::default(); self.block_count]);
252 |                     }
253 |                     let item = self
254 |                         .map_unsigned
255 |                         .as_mut()
256 |                         .expect("map should have been created above")[block]
257 |                         .get_mut(u64::from_ne_bytes(value.to_ne_bytes()));
258 |                     *item |= mask;
259 |                 }
260 |             }
261 |             Hash::UNSIGNED(value) => {
262 |                 if value <= 255 {
263 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
264 |                     let item = self.extended_ascii.get_mut(val_u8.into(), block);
265 |                     *item |= mask;
266 |                 } else {
267 |                     if self.map_unsigned.is_none() {
268 |                         self.map_unsigned =
269 |                             Some(vec![BitvectorHashmap::default(); self.block_count]);
270 |                     }
271 |                     let item = self
272 |                         .map_unsigned
273 |                         .as_mut()
274 |                         .expect("map should have been created above")[block]
275 |                         .get_mut(value);
276 |                     *item |= mask;
277 |                 }
278 |             }
279 |         }
280 |     }
281 | }
282 | 
283 | impl BitVectorInterface for BlockPatternMatchVector {
284 |     fn get<CharT>(&self, block: usize, key: CharT) -> u64
285 |     where
286 |         CharT: HashableChar,
287 |     {
288 |         debug_assert!(block < self.size());
289 | 
290 |         match key.hash_char() {
291 |             Hash::SIGNED(value) => {
292 |                 if value < 0 {
293 |                     self.map_signed.as_ref().map_or(0, |map| {
294 |                         map[block].get(u64::from_ne_bytes(value.to_ne_bytes()))
295 |                     })
296 |                 } else if value <= 255 {
297 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
298 |                     *self.extended_ascii.get(val_u8.into(), block)
299 |                 } else {
300 |                     self.map_unsigned.as_ref().map_or(0, |map| {
301 |                         map[block].get(u64::from_ne_bytes(value.to_ne_bytes()))
302 |                     })
303 |                 }
304 |             }
305 |             Hash::UNSIGNED(value) => {
306 |                 if value <= 255 {
307 |                     let val_u8 = u8::try_from(value).expect("we check the bounds above");
308 |                     *self.extended_ascii.get(val_u8.into(), block)
309 |                 } else {
310 |                     self.map_unsigned
311 |                         .as_ref()
312 |                         .map_or(0, |map| map[block].get(value))
313 |                 }
314 |             }
315 |         }
316 |     }
317 | 
318 |     fn size(&self) -> usize {
319 |         self.block_count
320 |     }
321 | }
322 | 


--------------------------------------------------------------------------------
/src/distance.rs:
--------------------------------------------------------------------------------
 1 | pub mod damerau_levenshtein;
 2 | pub mod hamming;
 3 | pub mod indel;
 4 | pub mod jaro;
 5 | pub mod jaro_winkler;
 6 | pub mod lcs_seq;
 7 | pub mod levenshtein;
 8 | pub mod osa;
 9 | pub mod postfix;
10 | pub mod prefix;
11 | 
12 | #[cfg(test)]
13 | pub(crate) mod example;
14 | 


--------------------------------------------------------------------------------
/src/distance/example.rs:
--------------------------------------------------------------------------------
1 | pub mod ocr;
2 | 


--------------------------------------------------------------------------------
/src/distance/hamming.rs:
--------------------------------------------------------------------------------
  1 | //! Hamming distance
  2 | //!
  3 | //! The Hamming distance measures the similarity of two sequences of equal length.
  4 | //! Specifically, it counts the minimum number of substitutions required to
  5 | //! transform one string into the other.
  6 | //!
  7 | //! While regularly the Hamming distance only works with texts of equal length,
  8 | //! this implementation provides an addition argument `pad` to decide whether texts
  9 | //! of unequal length should be padded or return an error.
 10 | //!
 11 | 
 12 | use crate::common::{DistanceCutoff, NoScoreCutoff, SimilarityCutoff, WithScoreCutoff};
 13 | use crate::details::distance::MetricUsize;
 14 | use crate::HashableChar;
 15 | 
 16 | use std::error;
 17 | use std::fmt::{self, Debug, Display, Formatter};
 18 | 
 19 | #[derive(Default, Copy, Clone)]
 20 | pub struct Padding(bool);
 21 | #[derive(Default, Copy, Clone)]
 22 | pub struct NoPadding;
 23 | 
 24 | #[must_use]
 25 | #[derive(Copy, Clone, Debug)]
 26 | pub struct Args<ResultType, CutoffType, PaddingType> {
 27 |     score_cutoff: CutoffType,
 28 |     score_hint: Option<ResultType>,
 29 |     pad: PaddingType,
 30 | }
 31 | 
 32 | impl<ResultType> Default for Args<ResultType, NoScoreCutoff, NoPadding> {
 33 |     fn default() -> Args<ResultType, NoScoreCutoff, NoPadding> {
 34 |         Args {
 35 |             score_cutoff: NoScoreCutoff,
 36 |             score_hint: None,
 37 |             pad: NoPadding,
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | pub trait PaddingTrait<T>
 43 | where
 44 |     T: Copy,
 45 | {
 46 |     type Output: Copy + PartialEq + Debug;
 47 | 
 48 |     fn pad(&self) -> bool;
 49 |     fn error(&self) -> Self::Output;
 50 |     fn score(&self, raw: T) -> Self::Output;
 51 | }
 52 | 
 53 | impl<T> PaddingTrait<T> for NoPadding
 54 | where
 55 |     T: Copy + PartialEq + Debug,
 56 | {
 57 |     type Output = Result<T, Error>;
 58 | 
 59 |     fn pad(&self) -> bool {
 60 |         false
 61 |     }
 62 | 
 63 |     fn error(&self) -> Self::Output {
 64 |         Err(Error::DifferentLengthArgs)
 65 |     }
 66 | 
 67 |     fn score(&self, raw: T) -> Self::Output {
 68 |         Ok(raw)
 69 |     }
 70 | }
 71 | 
 72 | impl<T> PaddingTrait<T> for Padding
 73 | where
 74 |     T: Copy + PartialOrd + Debug + Default,
 75 | {
 76 |     type Output = T;
 77 | 
 78 |     fn pad(&self) -> bool {
 79 |         self.0
 80 |     }
 81 | 
 82 |     // will not occur
 83 |     fn error(&self) -> Self::Output {
 84 |         T::default()
 85 |     }
 86 | 
 87 |     fn score(&self, raw: T) -> Self::Output {
 88 |         raw
 89 |     }
 90 | }
 91 | 
 92 | impl<ResultType, CutoffType, PaddingType> Args<ResultType, CutoffType, PaddingType>
 93 | where
 94 |     ResultType: Copy,
 95 | {
 96 |     pub fn score_hint(mut self, score_hint: ResultType) -> Self {
 97 |         self.score_hint = Some(score_hint);
 98 |         self
 99 |     }
100 | 
101 |     pub fn score_cutoff(
102 |         self,
103 |         score_cutoff: ResultType,
104 |     ) -> Args<ResultType, WithScoreCutoff<ResultType>, PaddingType> {
105 |         Args {
106 |             score_hint: self.score_hint,
107 |             score_cutoff: WithScoreCutoff(score_cutoff),
108 |             pad: self.pad,
109 |         }
110 |     }
111 | 
112 |     pub fn pad(self, pad: bool) -> Args<ResultType, CutoffType, Padding> {
113 |         Args {
114 |             score_hint: self.score_hint,
115 |             score_cutoff: self.score_cutoff,
116 |             pad: Padding(pad),
117 |         }
118 |     }
119 | }
120 | 
121 | #[derive(Debug, PartialEq, Eq, Copy, Clone)]
122 | pub enum Error {
123 |     DifferentLengthArgs,
124 | }
125 | 
126 | impl Display for Error {
127 |     fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
128 |         let text = match self {
129 |             Self::DifferentLengthArgs => "Differing length arguments provided",
130 |         };
131 | 
132 |         write!(fmt, "{text}")
133 |     }
134 | }
135 | 
136 | impl error::Error for Error {}
137 | 
138 | fn distance_impl<Iter1, Iter2>(mut s1: Iter1, mut s2: Iter2) -> usize
139 | where
140 |     Iter1: Iterator,
141 |     Iter2: Iterator,
142 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar,
143 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar,
144 | {
145 |     let mut dist = 0;
146 |     loop {
147 |         match (s1.next(), s2.next()) {
148 |             (Some(ch1), Some(ch2)) => {
149 |                 if ch1 != ch2 {
150 |                     dist += 1;
151 |                 }
152 |             }
153 |             (None, None) => {
154 |                 return dist;
155 |             }
156 |             _ => {
157 |                 dist += 1;
158 |             }
159 |         }
160 |     }
161 | }
162 | 
163 | struct IndividualComparator;
164 | 
165 | impl MetricUsize for IndividualComparator {
166 |     fn maximum(&self, len1: usize, len2: usize) -> usize {
167 |         len1.max(len2)
168 |     }
169 | 
170 |     fn _distance<Iter1, Iter2>(
171 |         &self,
172 |         s1: Iter1,
173 |         _len1: usize,
174 |         s2: Iter2,
175 |         _len2: usize,
176 |         _score_cutoff: Option<usize>,
177 |         _score_hint: Option<usize>,
178 |     ) -> usize
179 |     where
180 |         Iter1: Iterator,
181 |         Iter2: Iterator,
182 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar,
183 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar,
184 |     {
185 |         distance_impl(s1, s2)
186 |     }
187 | }
188 | 
189 | /// Hamming distance
190 | ///
191 | /// Calculates the Hamming distance.
192 | ///
193 | /// # Examples
194 | ///
195 | /// ```
196 | /// use rapidfuzz::distance::hamming;
197 | ///
198 | /// assert_eq!(Ok(1), hamming::distance("hamming".chars(), "humming".chars()));
199 | /// ```
200 | pub fn distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> Result<usize, Error>
201 | where
202 |     Iter1: IntoIterator,
203 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
204 |     Iter2: IntoIterator,
205 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
206 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
207 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
208 | {
209 |     distance_with_args(s1, s2, &Args::default())
210 | }
211 | 
212 | pub fn distance_with_args<Iter1, Iter2, CutoffType, PaddingType>(
213 |     s1: Iter1,
214 |     s2: Iter2,
215 |     args: &Args<usize, CutoffType, PaddingType>,
216 | ) -> PaddingType::Output
217 | where
218 |     Iter1: IntoIterator,
219 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
220 |     Iter2: IntoIterator,
221 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
222 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
223 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
224 |     CutoffType: DistanceCutoff<usize>,
225 |     PaddingType: PaddingTrait<CutoffType::Output>,
226 | {
227 |     let s1_iter = s1.into_iter();
228 |     let s2_iter = s2.into_iter();
229 |     let len1 = s1_iter.clone().count();
230 |     let len2 = s2_iter.clone().count();
231 | 
232 |     if !args.pad.pad() && len1 != len2 {
233 |         return args.pad.error();
234 |     }
235 | 
236 |     args.pad
237 |         .score(args.score_cutoff.score(IndividualComparator {}._distance(
238 |             s1_iter,
239 |             len1,
240 |             s2_iter,
241 |             len2,
242 |             args.score_cutoff.cutoff(),
243 |             args.score_hint,
244 |         )))
245 | }
246 | 
247 | /// Hamming similarity in the range [0, max]
248 | ///
249 | /// This is calculated as `max(len1, len2) - `[`distance`].
250 | ///
251 | pub fn similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> Result<usize, Error>
252 | where
253 |     Iter1: IntoIterator,
254 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
255 |     Iter2: IntoIterator,
256 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
257 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
258 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
259 | {
260 |     similarity_with_args(s1, s2, &Args::default())
261 | }
262 | 
263 | pub fn similarity_with_args<Iter1, Iter2, CutoffType, PaddingType>(
264 |     s1: Iter1,
265 |     s2: Iter2,
266 |     args: &Args<usize, CutoffType, PaddingType>,
267 | ) -> PaddingType::Output
268 | where
269 |     Iter1: IntoIterator,
270 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
271 |     Iter2: IntoIterator,
272 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
273 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
274 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
275 |     CutoffType: SimilarityCutoff<usize>,
276 |     PaddingType: PaddingTrait<CutoffType::Output>,
277 | {
278 |     let s1_iter = s1.into_iter();
279 |     let s2_iter = s2.into_iter();
280 |     let len1 = s1_iter.clone().count();
281 |     let len2 = s2_iter.clone().count();
282 | 
283 |     if !args.pad.pad() && len1 != len2 {
284 |         return args.pad.error();
285 |     }
286 | 
287 |     args.pad
288 |         .score(args.score_cutoff.score(IndividualComparator {}._similarity(
289 |             s1_iter,
290 |             len1,
291 |             s2_iter,
292 |             len2,
293 |             args.score_cutoff.cutoff(),
294 |             args.score_hint,
295 |         )))
296 | }
297 | 
298 | /// Normalized Hamming distance in the range [1.0, 0.0]
299 | ///
300 | /// This is calculated as [`distance`]` / max(len1, len2)`.
301 | ///
302 | pub fn normalized_distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> Result<f64, Error>
303 | where
304 |     Iter1: IntoIterator,
305 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
306 |     Iter2: IntoIterator,
307 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
308 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
309 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
310 | {
311 |     normalized_distance_with_args(s1, s2, &Args::default())
312 | }
313 | 
314 | pub fn normalized_distance_with_args<Iter1, Iter2, CutoffType, PaddingType>(
315 |     s1: Iter1,
316 |     s2: Iter2,
317 |     args: &Args<f64, CutoffType, PaddingType>,
318 | ) -> PaddingType::Output
319 | where
320 |     Iter1: IntoIterator,
321 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
322 |     Iter2: IntoIterator,
323 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
324 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
325 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
326 |     CutoffType: DistanceCutoff<f64>,
327 |     PaddingType: PaddingTrait<CutoffType::Output>,
328 | {
329 |     let s1_iter = s1.into_iter();
330 |     let s2_iter = s2.into_iter();
331 |     let len1 = s1_iter.clone().count();
332 |     let len2 = s2_iter.clone().count();
333 | 
334 |     if !args.pad.pad() && len1 != len2 {
335 |         return args.pad.error();
336 |     }
337 | 
338 |     args.pad.score(
339 |         args.score_cutoff
340 |             .score(IndividualComparator {}._normalized_distance(
341 |                 s1_iter,
342 |                 len1,
343 |                 s2_iter,
344 |                 len2,
345 |                 args.score_cutoff.cutoff(),
346 |                 args.score_hint,
347 |             )),
348 |     )
349 | }
350 | 
351 | /// Normalized Hamming similarity in the range [0.0, 1.0]
352 | ///
353 | /// This is calculated as `1.0 - `[`normalized_distance`].
354 | ///
355 | pub fn normalized_similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> Result<f64, Error>
356 | where
357 |     Iter1: IntoIterator,
358 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
359 |     Iter2: IntoIterator,
360 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
361 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
362 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
363 | {
364 |     normalized_similarity_with_args(s1, s2, &Args::default())
365 | }
366 | 
367 | pub fn normalized_similarity_with_args<Iter1, Iter2, CutoffType, PaddingType>(
368 |     s1: Iter1,
369 |     s2: Iter2,
370 |     args: &Args<f64, CutoffType, PaddingType>,
371 | ) -> PaddingType::Output
372 | where
373 |     Iter1: IntoIterator,
374 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
375 |     Iter2: IntoIterator,
376 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
377 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
378 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
379 |     CutoffType: SimilarityCutoff<f64>,
380 |     PaddingType: PaddingTrait<CutoffType::Output>,
381 | {
382 |     let s1_iter = s1.into_iter();
383 |     let s2_iter = s2.into_iter();
384 |     let len1 = s1_iter.clone().count();
385 |     let len2 = s2_iter.clone().count();
386 | 
387 |     if !args.pad.pad() && len1 != len2 {
388 |         return args.pad.error();
389 |     }
390 | 
391 |     args.pad.score(
392 |         args.score_cutoff
393 |             .score(IndividualComparator {}._normalized_similarity(
394 |                 s1_iter,
395 |                 len1,
396 |                 s2_iter,
397 |                 len2,
398 |                 args.score_cutoff.cutoff(),
399 |                 args.score_hint,
400 |             )),
401 |     )
402 | }
403 | 
404 | /// `One x Many` comparisons using the Hamming distance
405 | ///
406 | /// # Examples
407 | ///
408 | /// ```
409 | /// use rapidfuzz::distance::hamming;
410 | ///
411 | /// let scorer = hamming::BatchComparator::new("hamming".chars());
412 | /// assert_eq!(Ok(1), scorer.distance("humming".chars()));
413 | /// ```
414 | #[derive(Clone)]
415 | pub struct BatchComparator<Elem1> {
416 |     s1: Vec<Elem1>,
417 | }
418 | 
419 | impl<Elem1> BatchComparator<Elem1>
420 | where
421 |     Elem1: HashableChar + Clone,
422 | {
423 |     pub fn new<Iter1>(s1: Iter1) -> Self
424 |     where
425 |         Iter1: IntoIterator<Item = Elem1>,
426 |     {
427 |         Self {
428 |             s1: s1.into_iter().collect(),
429 |         }
430 |     }
431 | 
432 |     /// Distance calculated similar to [`distance`]
433 |     pub fn distance<Iter2>(&self, s2: Iter2) -> Result<usize, Error>
434 |     where
435 |         Iter2: IntoIterator,
436 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
437 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
438 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
439 |     {
440 |         distance(self.s1.iter().copied(), s2)
441 |     }
442 | 
443 |     pub fn distance_with_args<Iter2, CutoffType, PaddingType>(
444 |         &self,
445 |         s2: Iter2,
446 |         args: &Args<usize, CutoffType, PaddingType>,
447 |     ) -> PaddingType::Output
448 |     where
449 |         Iter2: IntoIterator,
450 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
451 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
452 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
453 |         CutoffType: DistanceCutoff<usize>,
454 |         PaddingType: PaddingTrait<CutoffType::Output>,
455 |     {
456 |         distance_with_args(self.s1.iter().copied(), s2, args)
457 |     }
458 | 
459 |     /// Similarity calculated similar to [`similarity`]
460 |     pub fn similarity<Iter2>(&self, s2: Iter2) -> Result<usize, Error>
461 |     where
462 |         Iter2: IntoIterator,
463 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
464 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
465 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
466 |     {
467 |         similarity(self.s1.iter().copied(), s2)
468 |     }
469 | 
470 |     pub fn similarity_with_args<Iter2, CutoffType, PaddingType>(
471 |         &self,
472 |         s2: Iter2,
473 |         args: &Args<usize, CutoffType, PaddingType>,
474 |     ) -> PaddingType::Output
475 |     where
476 |         Iter2: IntoIterator,
477 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
478 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
479 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
480 |         CutoffType: SimilarityCutoff<usize>,
481 |         PaddingType: PaddingTrait<CutoffType::Output>,
482 |     {
483 |         similarity_with_args(self.s1.iter().copied(), s2, args)
484 |     }
485 | 
486 |     /// Normalized distance calculated similar to [`normalized_distance`]
487 |     pub fn normalized_distance<Iter2>(&self, s2: Iter2) -> Result<f64, Error>
488 |     where
489 |         Iter2: IntoIterator,
490 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
491 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
492 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
493 |     {
494 |         normalized_distance(self.s1.iter().copied(), s2)
495 |     }
496 | 
497 |     pub fn normalized_distance_with_args<Iter2, CutoffType, PaddingType>(
498 |         &self,
499 |         s2: Iter2,
500 |         args: &Args<f64, CutoffType, PaddingType>,
501 |     ) -> PaddingType::Output
502 |     where
503 |         Iter2: IntoIterator,
504 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
505 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
506 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
507 |         CutoffType: DistanceCutoff<f64>,
508 |         PaddingType: PaddingTrait<CutoffType::Output>,
509 |     {
510 |         normalized_distance_with_args(self.s1.iter().copied(), s2, args)
511 |     }
512 | 
513 |     /// Normalized similarity calculated similar to [`normalized_similarity`]
514 |     pub fn normalized_similarity<Iter2>(&self, s2: Iter2) -> Result<f64, Error>
515 |     where
516 |         Iter2: IntoIterator,
517 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
518 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
519 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
520 |     {
521 |         normalized_similarity(self.s1.iter().copied(), s2)
522 |     }
523 | 
524 |     pub fn normalized_similarity_with_args<Iter2, CutoffType, PaddingType>(
525 |         &self,
526 |         s2: Iter2,
527 |         args: &Args<f64, CutoffType, PaddingType>,
528 |     ) -> PaddingType::Output
529 |     where
530 |         Iter2: IntoIterator,
531 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
532 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
533 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
534 |         CutoffType: SimilarityCutoff<f64>,
535 |         PaddingType: PaddingTrait<CutoffType::Output>,
536 |     {
537 |         normalized_similarity_with_args(self.s1.iter().copied(), s2, args)
538 |     }
539 | }
540 | 
541 | #[cfg(test)]
542 | mod tests {
543 |     use super::*;
544 | 
545 |     fn assert_dist(dist: usize, str1: &str, str2: &str) {
546 |         assert_eq!(Ok(dist), distance(str1.chars(), str2.chars()));
547 |     }
548 | 
549 |     #[test]
550 |     fn empty() {
551 |         assert_dist(0, "", "")
552 |     }
553 | 
554 |     #[test]
555 |     fn same() {
556 |         assert_dist(0, "hamming", "hamming")
557 |     }
558 | 
559 |     #[test]
560 |     fn numbers() {
561 |         assert_eq!(Ok(1), distance([1, 2, 4], [1, 2, 3]));
562 |     }
563 | 
564 |     #[test]
565 |     fn diff() {
566 |         assert_dist(3, "hamming", "hammers");
567 | 
568 |         assert_eq!(
569 |             3,
570 |             distance_with_args(
571 |                 "hammers".chars(),
572 |                 "hamming".chars(),
573 |                 &Args::default().pad(true)
574 |             )
575 |         );
576 |         assert_eq!(
577 |             Some(3),
578 |             distance_with_args(
579 |                 "hammers".chars(),
580 |                 "hamming".chars(),
581 |                 &Args::default().pad(true).score_cutoff(3)
582 |             )
583 |         );
584 |         assert_eq!(
585 |             None,
586 |             distance_with_args(
587 |                 "hammers".chars(),
588 |                 "hamming".chars(),
589 |                 &Args::default().pad(true).score_cutoff(2)
590 |             )
591 |         );
592 |         assert_eq!(
593 |             Ok(Some(3)),
594 |             distance_with_args(
595 |                 "hammers".chars(),
596 |                 "hamming".chars(),
597 |                 &Args::default().score_cutoff(3)
598 |             )
599 |         );
600 |         assert_eq!(
601 |             Ok(None),
602 |             distance_with_args(
603 |                 "hammers".chars(),
604 |                 "hamming".chars(),
605 |                 &Args::default().score_cutoff(2)
606 |             )
607 |         );
608 |     }
609 | 
610 |     #[test]
611 |     fn diff_multibyte() {
612 |         assert_dist(2, "hamming", "h香mmüng");
613 |     }
614 | 
615 |     #[test]
616 |     fn unequal_length() {
617 |         assert_eq!(
618 |             Err(Error::DifferentLengthArgs),
619 |             distance("ham".chars(), "hamming".chars())
620 |         );
621 | 
622 |         assert_eq!(
623 |             4,
624 |             distance_with_args("ham".chars(), "hamming".chars(), &Args::default().pad(true))
625 |         );
626 | 
627 |         assert_eq!(
628 |             None,
629 |             distance_with_args(
630 |                 "ham".chars(),
631 |                 "hamming".chars(),
632 |                 &Args::default().pad(true).score_cutoff(3)
633 |             )
634 |         );
635 |     }
636 | 
637 |     #[test]
638 |     fn names() {
639 |         assert_dist(14, "Friedrich Nietzs", "Jean-Paul Sartre")
640 |     }
641 | }
642 | 


--------------------------------------------------------------------------------
/src/distance/postfix.rs:
--------------------------------------------------------------------------------
  1 | //! Postfix similarity
  2 | //!
  3 | //! The Postfix similarity measures the length of the common postfix between two
  4 | //! sequences.
  5 | //!
  6 | 
  7 | use crate::common::{DistanceCutoff, NoScoreCutoff, SimilarityCutoff, WithScoreCutoff};
  8 | use crate::details::common::find_common_suffix;
  9 | use crate::details::distance::MetricUsize;
 10 | use crate::HashableChar;
 11 | 
 12 | #[must_use]
 13 | #[derive(Copy, Clone, Debug)]
 14 | pub struct Args<ResultType, CutoffType> {
 15 |     score_cutoff: CutoffType,
 16 |     score_hint: Option<ResultType>,
 17 | }
 18 | 
 19 | impl<ResultType> Default for Args<ResultType, NoScoreCutoff> {
 20 |     fn default() -> Args<ResultType, NoScoreCutoff> {
 21 |         Args {
 22 |             score_cutoff: NoScoreCutoff,
 23 |             score_hint: None,
 24 |         }
 25 |     }
 26 | }
 27 | 
 28 | impl<ResultType, CutoffType> Args<ResultType, CutoffType> {
 29 |     pub fn score_hint(mut self, score_hint: ResultType) -> Self {
 30 |         self.score_hint = Some(score_hint);
 31 |         self
 32 |     }
 33 | 
 34 |     pub fn score_cutoff(
 35 |         self,
 36 |         score_cutoff: ResultType,
 37 |     ) -> Args<ResultType, WithScoreCutoff<ResultType>> {
 38 |         Args {
 39 |             score_hint: self.score_hint,
 40 |             score_cutoff: WithScoreCutoff(score_cutoff),
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | struct IndividualComparator;
 46 | 
 47 | impl MetricUsize for IndividualComparator {
 48 |     fn maximum(&self, len1: usize, len2: usize) -> usize {
 49 |         len1.max(len2)
 50 |     }
 51 | 
 52 |     fn _similarity<Iter1, Iter2>(
 53 |         &self,
 54 |         s1: Iter1,
 55 |         _len1: usize,
 56 |         s2: Iter2,
 57 |         _len2: usize,
 58 |         _score_cutoff: Option<usize>,
 59 |         _score_hint: Option<usize>,
 60 |     ) -> usize
 61 |     where
 62 |         Iter1: DoubleEndedIterator + Clone,
 63 |         Iter2: DoubleEndedIterator + Clone,
 64 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar,
 65 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar,
 66 |     {
 67 |         find_common_suffix(s1, s2)
 68 |     }
 69 | }
 70 | 
 71 | /// Postfix distance in the range [max, 0].
 72 | ///
 73 | /// This is calculated as `max(len1, len2) - `[`similarity`].
 74 | ///
 75 | pub fn distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
 76 | where
 77 |     Iter1: IntoIterator,
 78 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 79 |     Iter2: IntoIterator,
 80 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 81 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 82 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 83 | {
 84 |     distance_with_args(s1, s2, &Args::default())
 85 | }
 86 | 
 87 | pub fn distance_with_args<Iter1, Iter2, CutoffType>(
 88 |     s1: Iter1,
 89 |     s2: Iter2,
 90 |     args: &Args<usize, CutoffType>,
 91 | ) -> CutoffType::Output
 92 | where
 93 |     Iter1: IntoIterator,
 94 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 95 |     Iter2: IntoIterator,
 96 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 97 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 98 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 99 |     CutoffType: DistanceCutoff<usize>,
100 | {
101 |     let s1_iter = s1.into_iter();
102 |     let s2_iter = s2.into_iter();
103 |     args.score_cutoff.score(IndividualComparator {}._distance(
104 |         s1_iter.clone(),
105 |         s1_iter.count(),
106 |         s2_iter.clone(),
107 |         s2_iter.count(),
108 |         args.score_cutoff.cutoff(),
109 |         args.score_hint,
110 |     ))
111 | }
112 | 
113 | /// Postfix similarity
114 | ///
115 | /// Calculates the Postfix similarity.
116 | ///
117 | /// # Examples
118 | ///
119 | /// ```
120 | /// use rapidfuzz::distance::postfix;
121 | ///
122 | /// assert_eq!(3, postfix::similarity("postfix".chars(), "prefix".chars()));
123 | /// ```
124 | pub fn similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
125 | where
126 |     Iter1: IntoIterator,
127 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
128 |     Iter2: IntoIterator,
129 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
130 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
131 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
132 | {
133 |     similarity_with_args(s1, s2, &Args::default())
134 | }
135 | 
136 | pub fn similarity_with_args<Iter1, Iter2, CutoffType>(
137 |     s1: Iter1,
138 |     s2: Iter2,
139 |     args: &Args<usize, CutoffType>,
140 | ) -> CutoffType::Output
141 | where
142 |     Iter1: IntoIterator,
143 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
144 |     Iter2: IntoIterator,
145 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
146 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
147 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
148 |     CutoffType: SimilarityCutoff<usize>,
149 | {
150 |     let s1_iter = s1.into_iter();
151 |     let s2_iter = s2.into_iter();
152 |     args.score_cutoff.score(IndividualComparator {}._similarity(
153 |         s1_iter.clone(),
154 |         s1_iter.count(),
155 |         s2_iter.clone(),
156 |         s2_iter.count(),
157 |         args.score_cutoff.cutoff(),
158 |         args.score_hint,
159 |     ))
160 | }
161 | 
162 | /// Normalized Postfix distance in the range [1.0, 0.0]
163 | ///
164 | /// This is calculated as [`distance`]` / max(len1, len2)`.
165 | ///
166 | pub fn normalized_distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> f64
167 | where
168 |     Iter1: IntoIterator,
169 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
170 |     Iter2: IntoIterator,
171 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
172 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
173 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
174 | {
175 |     normalized_distance_with_args(s1, s2, &Args::default())
176 | }
177 | 
178 | pub fn normalized_distance_with_args<Iter1, Iter2, CutoffType>(
179 |     s1: Iter1,
180 |     s2: Iter2,
181 |     args: &Args<f64, CutoffType>,
182 | ) -> CutoffType::Output
183 | where
184 |     Iter1: IntoIterator,
185 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
186 |     Iter2: IntoIterator,
187 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
188 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
189 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
190 |     CutoffType: DistanceCutoff<f64>,
191 | {
192 |     let s1_iter = s1.into_iter();
193 |     let s2_iter = s2.into_iter();
194 |     args.score_cutoff
195 |         .score(IndividualComparator {}._normalized_distance(
196 |             s1_iter.clone(),
197 |             s1_iter.count(),
198 |             s2_iter.clone(),
199 |             s2_iter.count(),
200 |             args.score_cutoff.cutoff(),
201 |             args.score_hint,
202 |         ))
203 | }
204 | 
205 | /// Normalized Postfix similarity in the range [0.0, 1.0]
206 | ///
207 | /// This is calculated as `1.0 - `[`normalized_distance`].
208 | ///
209 | pub fn normalized_similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> f64
210 | where
211 |     Iter1: IntoIterator,
212 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
213 |     Iter2: IntoIterator,
214 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
215 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
216 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
217 | {
218 |     normalized_similarity_with_args(s1, s2, &Args::default())
219 | }
220 | 
221 | pub fn normalized_similarity_with_args<Iter1, Iter2, CutoffType>(
222 |     s1: Iter1,
223 |     s2: Iter2,
224 |     args: &Args<f64, CutoffType>,
225 | ) -> CutoffType::Output
226 | where
227 |     Iter1: IntoIterator,
228 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
229 |     Iter2: IntoIterator,
230 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
231 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
232 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
233 |     CutoffType: SimilarityCutoff<f64>,
234 | {
235 |     let s1_iter = s1.into_iter();
236 |     let s2_iter = s2.into_iter();
237 |     args.score_cutoff
238 |         .score(IndividualComparator {}._normalized_similarity(
239 |             s1_iter.clone(),
240 |             s1_iter.count(),
241 |             s2_iter.clone(),
242 |             s2_iter.count(),
243 |             args.score_cutoff.cutoff(),
244 |             args.score_hint,
245 |         ))
246 | }
247 | 
248 | /// `One x Many` comparisons using the Postfix similarity
249 | ///
250 | /// # Examples
251 | ///
252 | /// ```
253 | /// use rapidfuzz::distance::postfix;
254 | ///
255 | /// let scorer = postfix::BatchComparator::new("postfix".chars());
256 | /// assert_eq!(3, scorer.similarity("prefix".chars()));
257 | /// ```
258 | #[derive(Clone)]
259 | pub struct BatchComparator<Elem1> {
260 |     s1: Vec<Elem1>,
261 | }
262 | 
263 | impl<Elem1> BatchComparator<Elem1>
264 | where
265 |     Elem1: HashableChar + Clone,
266 | {
267 |     pub fn new<Iter1>(s1: Iter1) -> Self
268 |     where
269 |         Iter1: IntoIterator<Item = Elem1>,
270 |         Iter1::IntoIter: Clone,
271 |     {
272 |         let s1_iter = s1.into_iter();
273 |         Self {
274 |             s1: s1_iter.collect(),
275 |         }
276 |     }
277 | 
278 |     /// Normalized distance calculated similar to [`normalized_distance`]
279 |     pub fn normalized_distance<Iter2>(&self, s2: Iter2) -> f64
280 |     where
281 |         Iter2: IntoIterator,
282 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
283 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
284 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
285 |     {
286 |         normalized_distance(self.s1.iter().copied(), s2)
287 |     }
288 | 
289 |     pub fn normalized_distance_with_args<Iter2, CutoffType>(
290 |         &self,
291 |         s2: Iter2,
292 |         args: &Args<f64, CutoffType>,
293 |     ) -> CutoffType::Output
294 |     where
295 |         Iter2: IntoIterator,
296 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
297 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
298 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
299 |         CutoffType: DistanceCutoff<f64>,
300 |     {
301 |         normalized_distance_with_args(self.s1.iter().copied(), s2, args)
302 |     }
303 | 
304 |     /// Normalized similarity calculated similar to [`normalized_similarity`]
305 |     pub fn normalized_similarity<Iter2>(&self, s2: Iter2) -> f64
306 |     where
307 |         Iter2: IntoIterator,
308 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
309 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
310 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
311 |     {
312 |         normalized_similarity(self.s1.iter().copied(), s2)
313 |     }
314 | 
315 |     pub fn normalized_similarity_with_args<Iter2, CutoffType>(
316 |         &self,
317 |         s2: Iter2,
318 |         args: &Args<f64, CutoffType>,
319 |     ) -> CutoffType::Output
320 |     where
321 |         Iter2: IntoIterator,
322 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
323 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
324 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
325 |         CutoffType: SimilarityCutoff<f64>,
326 |     {
327 |         normalized_similarity_with_args(self.s1.iter().copied(), s2, args)
328 |     }
329 | 
330 |     /// Distance calculated similar to [`distance`]
331 |     pub fn distance<Iter2>(&self, s2: Iter2) -> usize
332 |     where
333 |         Iter2: IntoIterator,
334 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
335 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
336 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
337 |     {
338 |         distance(self.s1.iter().copied(), s2)
339 |     }
340 | 
341 |     pub fn distance_with_args<Iter2, CutoffType>(
342 |         &self,
343 |         s2: Iter2,
344 |         args: &Args<usize, CutoffType>,
345 |     ) -> CutoffType::Output
346 |     where
347 |         Iter2: IntoIterator,
348 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
349 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
350 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
351 |         CutoffType: DistanceCutoff<usize>,
352 |     {
353 |         distance_with_args(self.s1.iter().copied(), s2, args)
354 |     }
355 | 
356 |     /// Similarity calculated similar to [`similarity`]
357 |     pub fn similarity<Iter2>(&self, s2: Iter2) -> usize
358 |     where
359 |         Iter2: IntoIterator,
360 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
361 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
362 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
363 |     {
364 |         similarity(self.s1.iter().copied(), s2)
365 |     }
366 | 
367 |     pub fn similarity_with_args<Iter2, CutoffType>(
368 |         &self,
369 |         s2: Iter2,
370 |         args: &Args<usize, CutoffType>,
371 |     ) -> CutoffType::Output
372 |     where
373 |         Iter2: IntoIterator,
374 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
375 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
376 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
377 |         CutoffType: SimilarityCutoff<usize>,
378 |     {
379 |         similarity_with_args(self.s1.iter().copied(), s2, args)
380 |     }
381 | }
382 | 


--------------------------------------------------------------------------------
/src/distance/prefix.rs:
--------------------------------------------------------------------------------
  1 | //! Prefix similarity
  2 | //!
  3 | //! The Prefix similarity measures the length of the common prefix between two
  4 | //! sequences.
  5 | //!
  6 | 
  7 | use crate::common::{DistanceCutoff, NoScoreCutoff, SimilarityCutoff, WithScoreCutoff};
  8 | use crate::details::common::find_common_prefix;
  9 | use crate::details::distance::MetricUsize;
 10 | use crate::HashableChar;
 11 | 
 12 | #[must_use]
 13 | #[derive(Copy, Clone, Debug)]
 14 | pub struct Args<ResultType, CutoffType> {
 15 |     score_cutoff: CutoffType,
 16 |     score_hint: Option<ResultType>,
 17 | }
 18 | 
 19 | impl<ResultType> Default for Args<ResultType, NoScoreCutoff> {
 20 |     fn default() -> Args<ResultType, NoScoreCutoff> {
 21 |         Args {
 22 |             score_cutoff: NoScoreCutoff,
 23 |             score_hint: None,
 24 |         }
 25 |     }
 26 | }
 27 | 
 28 | impl<ResultType, CutoffType> Args<ResultType, CutoffType> {
 29 |     pub fn score_hint(mut self, score_hint: ResultType) -> Self {
 30 |         self.score_hint = Some(score_hint);
 31 |         self
 32 |     }
 33 | 
 34 |     pub fn score_cutoff(
 35 |         self,
 36 |         score_cutoff: ResultType,
 37 |     ) -> Args<ResultType, WithScoreCutoff<ResultType>> {
 38 |         Args {
 39 |             score_hint: self.score_hint,
 40 |             score_cutoff: WithScoreCutoff(score_cutoff),
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | struct IndividualComparator;
 46 | 
 47 | impl MetricUsize for IndividualComparator {
 48 |     fn maximum(&self, len1: usize, len2: usize) -> usize {
 49 |         len1.max(len2)
 50 |     }
 51 | 
 52 |     fn _similarity<Iter1, Iter2>(
 53 |         &self,
 54 |         s1: Iter1,
 55 |         _len1: usize,
 56 |         s2: Iter2,
 57 |         _len2: usize,
 58 |         _score_cutoff: Option<usize>,
 59 |         _score_hint: Option<usize>,
 60 |     ) -> usize
 61 |     where
 62 |         Iter1: Iterator + Clone,
 63 |         Iter2: Iterator + Clone,
 64 |         Iter1::Item: PartialEq<Iter2::Item> + HashableChar,
 65 |         Iter2::Item: PartialEq<Iter1::Item> + HashableChar,
 66 |     {
 67 |         find_common_prefix(s1, s2)
 68 |     }
 69 | }
 70 | 
 71 | /// Prefix distance in the range [max, 0].
 72 | ///
 73 | /// This is calculated as `max(len1, len2) - `[`similarity`].
 74 | ///
 75 | pub fn distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
 76 | where
 77 |     Iter1: IntoIterator,
 78 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 79 |     Iter2: IntoIterator,
 80 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 81 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 82 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 83 | {
 84 |     distance_with_args(s1, s2, &Args::default())
 85 | }
 86 | 
 87 | pub fn distance_with_args<Iter1, Iter2, CutoffType>(
 88 |     s1: Iter1,
 89 |     s2: Iter2,
 90 |     args: &Args<usize, CutoffType>,
 91 | ) -> CutoffType::Output
 92 | where
 93 |     Iter1: IntoIterator,
 94 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 95 |     Iter2: IntoIterator,
 96 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 97 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 98 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 99 |     CutoffType: DistanceCutoff<usize>,
100 | {
101 |     let s1_iter = s1.into_iter();
102 |     let s2_iter = s2.into_iter();
103 |     args.score_cutoff.score(IndividualComparator {}._distance(
104 |         s1_iter.clone(),
105 |         s1_iter.count(),
106 |         s2_iter.clone(),
107 |         s2_iter.count(),
108 |         args.score_cutoff.cutoff(),
109 |         args.score_hint,
110 |     ))
111 | }
112 | 
113 | /// Prefix similarity
114 | ///
115 | /// Calculates the Prefix similarity.
116 | ///
117 | /// # Examples
118 | ///
119 | /// ```
120 | /// use rapidfuzz::distance::prefix;
121 | ///
122 | /// assert_eq!(4, prefix::similarity("prefix".chars(), "preference".chars()));
123 | /// ```
124 | pub fn similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> usize
125 | where
126 |     Iter1: IntoIterator,
127 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
128 |     Iter2: IntoIterator,
129 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
130 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
131 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
132 | {
133 |     similarity_with_args(s1, s2, &Args::default())
134 | }
135 | 
136 | pub fn similarity_with_args<Iter1, Iter2, CutoffType>(
137 |     s1: Iter1,
138 |     s2: Iter2,
139 |     args: &Args<usize, CutoffType>,
140 | ) -> CutoffType::Output
141 | where
142 |     Iter1: IntoIterator,
143 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
144 |     Iter2: IntoIterator,
145 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
146 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
147 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
148 |     CutoffType: SimilarityCutoff<usize>,
149 | {
150 |     let s1_iter = s1.into_iter();
151 |     let s2_iter = s2.into_iter();
152 |     args.score_cutoff.score(IndividualComparator {}._similarity(
153 |         s1_iter.clone(),
154 |         s1_iter.count(),
155 |         s2_iter.clone(),
156 |         s2_iter.count(),
157 |         args.score_cutoff.cutoff(),
158 |         args.score_hint,
159 |     ))
160 | }
161 | 
162 | /// Normalized Prefix distance in the range [1.0, 0.0]
163 | ///
164 | /// This is calculated as [`distance`]` / max(len1, len2)`.
165 | ///
166 | pub fn normalized_distance<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> f64
167 | where
168 |     Iter1: IntoIterator,
169 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
170 |     Iter2: IntoIterator,
171 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
172 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
173 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
174 | {
175 |     normalized_distance_with_args(s1, s2, &Args::default())
176 | }
177 | 
178 | pub fn normalized_distance_with_args<Iter1, Iter2, CutoffType>(
179 |     s1: Iter1,
180 |     s2: Iter2,
181 |     args: &Args<f64, CutoffType>,
182 | ) -> CutoffType::Output
183 | where
184 |     Iter1: IntoIterator,
185 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
186 |     Iter2: IntoIterator,
187 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
188 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
189 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
190 |     CutoffType: DistanceCutoff<f64>,
191 | {
192 |     let s1_iter = s1.into_iter();
193 |     let s2_iter = s2.into_iter();
194 |     args.score_cutoff
195 |         .score(IndividualComparator {}._normalized_distance(
196 |             s1_iter.clone(),
197 |             s1_iter.count(),
198 |             s2_iter.clone(),
199 |             s2_iter.count(),
200 |             args.score_cutoff.cutoff(),
201 |             args.score_hint,
202 |         ))
203 | }
204 | 
205 | /// Normalized Prefix similarity in the range [0.0, 1.0]
206 | ///
207 | /// This is calculated as `1.0 - `[`normalized_distance`].
208 | ///
209 | pub fn normalized_similarity<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> f64
210 | where
211 |     Iter1: IntoIterator,
212 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
213 |     Iter2: IntoIterator,
214 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
215 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
216 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
217 | {
218 |     normalized_similarity_with_args(s1, s2, &Args::default())
219 | }
220 | 
221 | pub fn normalized_similarity_with_args<Iter1, Iter2, CutoffType>(
222 |     s1: Iter1,
223 |     s2: Iter2,
224 |     args: &Args<f64, CutoffType>,
225 | ) -> CutoffType::Output
226 | where
227 |     Iter1: IntoIterator,
228 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
229 |     Iter2: IntoIterator,
230 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
231 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
232 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
233 |     CutoffType: SimilarityCutoff<f64>,
234 | {
235 |     let s1_iter = s1.into_iter();
236 |     let s2_iter = s2.into_iter();
237 |     args.score_cutoff
238 |         .score(IndividualComparator {}._normalized_similarity(
239 |             s1_iter.clone(),
240 |             s1_iter.count(),
241 |             s2_iter.clone(),
242 |             s2_iter.count(),
243 |             args.score_cutoff.cutoff(),
244 |             args.score_hint,
245 |         ))
246 | }
247 | 
248 | /// `One x Many` comparisons using the Prefix similarity
249 | ///
250 | /// # Examples
251 | ///
252 | /// ```
253 | /// use rapidfuzz::distance::prefix;
254 | ///
255 | /// let scorer = prefix::BatchComparator::new("prefix".chars());
256 | /// assert_eq!(4, scorer.similarity("preference".chars()));
257 | /// ```
258 | #[derive(Clone)]
259 | pub struct BatchComparator<Elem1> {
260 |     s1: Vec<Elem1>,
261 | }
262 | 
263 | impl<Elem1> BatchComparator<Elem1>
264 | where
265 |     Elem1: HashableChar + Clone,
266 | {
267 |     pub fn new<Iter1>(s1: Iter1) -> Self
268 |     where
269 |         Iter1: IntoIterator<Item = Elem1>,
270 |         Iter1::IntoIter: Clone,
271 |     {
272 |         let s1_iter = s1.into_iter();
273 |         Self {
274 |             s1: s1_iter.collect(),
275 |         }
276 |     }
277 | 
278 |     /// Normalized distance calculated similar to [`normalized_distance`]
279 |     pub fn normalized_distance<Iter2>(&self, s2: Iter2) -> f64
280 |     where
281 |         Iter2: IntoIterator,
282 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
283 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
284 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
285 |     {
286 |         normalized_distance(self.s1.iter().copied(), s2)
287 |     }
288 | 
289 |     pub fn normalized_distance_with_args<Iter2, CutoffType>(
290 |         &self,
291 |         s2: Iter2,
292 |         args: &Args<f64, CutoffType>,
293 |     ) -> CutoffType::Output
294 |     where
295 |         Iter2: IntoIterator,
296 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
297 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
298 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
299 |         CutoffType: DistanceCutoff<f64>,
300 |     {
301 |         normalized_distance_with_args(self.s1.iter().copied(), s2, args)
302 |     }
303 | 
304 |     /// Normalized similarity calculated similar to [`normalized_similarity`]
305 |     pub fn normalized_similarity<Iter2>(&self, s2: Iter2) -> f64
306 |     where
307 |         Iter2: IntoIterator,
308 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
309 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
310 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
311 |     {
312 |         normalized_similarity(self.s1.iter().copied(), s2)
313 |     }
314 | 
315 |     pub fn normalized_similarity_with_args<Iter2, CutoffType>(
316 |         &self,
317 |         s2: Iter2,
318 |         args: &Args<f64, CutoffType>,
319 |     ) -> CutoffType::Output
320 |     where
321 |         Iter2: IntoIterator,
322 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
323 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
324 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
325 |         CutoffType: SimilarityCutoff<f64>,
326 |     {
327 |         normalized_similarity_with_args(self.s1.iter().copied(), s2, args)
328 |     }
329 | 
330 |     /// Distance calculated similar to [`distance`]
331 |     pub fn distance<Iter2>(&self, s2: Iter2) -> usize
332 |     where
333 |         Iter2: IntoIterator,
334 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
335 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
336 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
337 |     {
338 |         distance(self.s1.iter().copied(), s2)
339 |     }
340 | 
341 |     pub fn distance_with_args<Iter2, CutoffType>(
342 |         &self,
343 |         s2: Iter2,
344 |         args: &Args<usize, CutoffType>,
345 |     ) -> CutoffType::Output
346 |     where
347 |         Iter2: IntoIterator,
348 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
349 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
350 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
351 |         CutoffType: DistanceCutoff<usize>,
352 |     {
353 |         distance_with_args(self.s1.iter().copied(), s2, args)
354 |     }
355 | 
356 |     /// Similarity calculated similar to [`similarity`]
357 |     pub fn similarity<Iter2>(&self, s2: Iter2) -> usize
358 |     where
359 |         Iter2: IntoIterator,
360 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
361 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
362 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
363 |     {
364 |         similarity(self.s1.iter().copied(), s2)
365 |     }
366 | 
367 |     pub fn similarity_with_args<Iter2, CutoffType>(
368 |         &self,
369 |         s2: Iter2,
370 |         args: &Args<usize, CutoffType>,
371 |     ) -> CutoffType::Output
372 |     where
373 |         Iter2: IntoIterator,
374 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
375 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
376 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
377 |         CutoffType: SimilarityCutoff<usize>,
378 |     {
379 |         similarity_with_args(self.s1.iter().copied(), s2, args)
380 |     }
381 | }
382 | 


--------------------------------------------------------------------------------
/src/fuzz.rs:
--------------------------------------------------------------------------------
  1 | use crate::common::{NoScoreCutoff, SimilarityCutoff, WithScoreCutoff};
  2 | use crate::details::distance::MetricUsize;
  3 | use crate::distance::indel;
  4 | use crate::HashableChar;
  5 | 
  6 | #[must_use]
  7 | #[derive(Clone, Copy, Debug)]
  8 | pub struct Args<ResultType, CutoffType> {
  9 |     score_cutoff: CutoffType,
 10 |     score_hint: Option<ResultType>,
 11 | }
 12 | 
 13 | impl<ResultType> Default for Args<ResultType, NoScoreCutoff> {
 14 |     fn default() -> Args<ResultType, NoScoreCutoff> {
 15 |         Args {
 16 |             score_cutoff: NoScoreCutoff,
 17 |             score_hint: None,
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | impl<ResultType, CutoffType> Args<ResultType, CutoffType> {
 23 |     pub fn score_hint(mut self, score_hint: ResultType) -> Self {
 24 |         self.score_hint = Some(score_hint);
 25 |         self
 26 |     }
 27 | 
 28 |     pub fn score_cutoff(
 29 |         self,
 30 |         score_cutoff: ResultType,
 31 |     ) -> Args<ResultType, WithScoreCutoff<ResultType>> {
 32 |         Args {
 33 |             score_hint: self.score_hint,
 34 |             score_cutoff: WithScoreCutoff(score_cutoff),
 35 |         }
 36 |     }
 37 | }
 38 | 
 39 | /// Returns a simple ratio between two strings or `None` if `ratio < score_cutoff`
 40 | ///
 41 | /// # Example
 42 | /// ```
 43 | /// use rapidfuzz::fuzz;
 44 | /// /// score is 0.9655
 45 | /// let score = fuzz::ratio("this is a test".chars(), "this is a test!".chars());
 46 | /// ```
 47 | ///
 48 | pub fn ratio<Iter1, Iter2>(s1: Iter1, s2: Iter2) -> f64
 49 | where
 50 |     Iter1: IntoIterator,
 51 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 52 |     Iter2: IntoIterator,
 53 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 54 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 55 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 56 | {
 57 |     ratio_with_args(s1, s2, &Args::default())
 58 | }
 59 | 
 60 | pub fn ratio_with_args<Iter1, Iter2, CutoffType>(
 61 |     s1: Iter1,
 62 |     s2: Iter2,
 63 |     args: &Args<f64, CutoffType>,
 64 | ) -> CutoffType::Output
 65 | where
 66 |     Iter1: IntoIterator,
 67 |     Iter1::IntoIter: DoubleEndedIterator + Clone,
 68 |     Iter2: IntoIterator,
 69 |     Iter2::IntoIter: DoubleEndedIterator + Clone,
 70 |     Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
 71 |     Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
 72 |     CutoffType: SimilarityCutoff<f64>,
 73 | {
 74 |     let s1_iter = s1.into_iter();
 75 |     let s2_iter = s2.into_iter();
 76 |     args.score_cutoff
 77 |         .score(indel::IndividualComparator {}._normalized_similarity(
 78 |             s1_iter.clone(),
 79 |             s1_iter.count(),
 80 |             s2_iter.clone(),
 81 |             s2_iter.count(),
 82 |             args.score_cutoff.cutoff(),
 83 |             args.score_hint,
 84 |         ))
 85 | }
 86 | 
 87 | /// `One x Many` comparisons using `ratio`
 88 | ///
 89 | /// # Examples
 90 | ///
 91 | /// ```
 92 | /// use rapidfuzz::fuzz;
 93 | ///
 94 | /// let scorer = fuzz::RatioBatchComparator::new("this is a test".chars());
 95 | /// /// score is 0.9655
 96 | /// let score = scorer.similarity("this is a test!".chars());
 97 | /// ```
 98 | pub struct RatioBatchComparator<Elem1> {
 99 |     scorer: indel::BatchComparator<Elem1>,
100 | }
101 | 
102 | impl<Elem1> RatioBatchComparator<Elem1>
103 | where
104 |     Elem1: HashableChar + Clone,
105 | {
106 |     pub fn new<Iter1>(s1: Iter1) -> Self
107 |     where
108 |         Iter1: IntoIterator<Item = Elem1>,
109 |         Iter1::IntoIter: Clone,
110 |     {
111 |         Self {
112 |             scorer: indel::BatchComparator::new(s1),
113 |         }
114 |     }
115 | 
116 |     /// Similarity calculated similar to [`ratio`]
117 |     pub fn similarity<Iter2>(&self, s2: Iter2) -> f64
118 |     where
119 |         Iter2: IntoIterator,
120 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
121 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
122 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
123 |     {
124 |         self.similarity_with_args(s2, &Args::default())
125 |     }
126 | 
127 |     pub fn similarity_with_args<Iter2, CutoffType>(
128 |         &self,
129 |         s2: Iter2,
130 |         args: &Args<f64, CutoffType>,
131 |     ) -> CutoffType::Output
132 |     where
133 |         Iter2: IntoIterator,
134 |         Iter2::IntoIter: DoubleEndedIterator + Clone,
135 |         Elem1: PartialEq<Iter2::Item> + HashableChar + Copy,
136 |         Iter2::Item: PartialEq<Elem1> + HashableChar + Copy,
137 |         CutoffType: SimilarityCutoff<f64>,
138 |     {
139 |         let s2_iter = s2.into_iter();
140 |         args.score_cutoff
141 |             .score(self.scorer.scorer._normalized_similarity(
142 |                 self.scorer.scorer.s1.iter().copied(),
143 |                 self.scorer.scorer.s1.len(),
144 |                 s2_iter.clone(),
145 |                 s2_iter.count(),
146 |                 args.score_cutoff.cutoff(),
147 |                 args.score_hint,
148 |             ))
149 |     }
150 | }
151 | 
152 | #[cfg(test)]
153 | mod tests {
154 |     use super::*;
155 | 
156 |     static S1: &str = "new york mets";
157 |     static S3: &str = "the wonderful new york mets";
158 |     //static S4: &str = "new york mets vs atlanta braves";
159 |     //static S5: &str = "atlanta braves vs new york mets";
160 |     //static S7: &str = "new york city mets - atlanta braves";
161 |     // test silly corner cases
162 |     static S8: &str = "{";
163 |     static S9: &str = "{a";
164 |     //static S10: &str = "a{";
165 |     //static S10A: &str = "{b";
166 | 
167 |     macro_rules! assert_delta {
168 |         ($x:expr, $y:expr) => {
169 |             match ($x, $y) {
170 |                 (None, None) => {}
171 |                 (Some(val1), Some(val2)) => {
172 |                     if (val1 - val2).abs() > 0.0001 {
173 |                         panic!("{:?} != {:?}", $x, $y);
174 |                     }
175 |                 }
176 |                 (_, _) => panic!("{:?} != {:?}", $x, $y),
177 |             }
178 |         };
179 |     }
180 | 
181 |     #[test]
182 |     fn test_equal() {
183 |         assert_delta!(
184 |             Some(1.0),
185 |             Some(ratio_with_args(S1.chars(), S1.chars(), &Args::default()))
186 |         );
187 |         assert_delta!(
188 |             Some(1.0),
189 |             Some(ratio_with_args(
190 |                 "test".chars(),
191 |                 "test".chars(),
192 |                 &Args::default()
193 |             ))
194 |         );
195 |         assert_delta!(
196 |             Some(1.0),
197 |             Some(ratio_with_args(S8.chars(), S8.chars(), &Args::default()))
198 |         );
199 |         assert_delta!(
200 |             Some(1.0),
201 |             Some(ratio_with_args(S9.chars(), S9.chars(), &Args::default()))
202 |         );
203 |     }
204 | 
205 |     #[test]
206 |     fn test_partial_ratio() {
207 |         //assert_delta!(Some(1.0), partial_ratio(S1.chars(), S1.chars(), None, None));
208 |         assert_delta!(
209 |             Some(0.65),
210 |             Some(ratio_with_args(S1.chars(), S3.chars(), &Args::default()))
211 |         );
212 |         //assert_delta!(Some(1.0), partial_ratio(S1.chars(), S3.chars(), None, None));
213 |     }
214 | 
215 |     #[test]
216 |     fn two_empty_strings() {
217 |         assert_delta!(
218 |             Some(1.0),
219 |             Some(ratio_with_args("".chars(), "".chars(), &Args::default()))
220 |         );
221 |     }
222 | 
223 |     #[test]
224 |     fn first_string_empty() {
225 |         assert_delta!(
226 |             Some(0.0),
227 |             Some(ratio_with_args(
228 |                 "test".chars(),
229 |                 "".chars(),
230 |                 &Args::default()
231 |             ))
232 |         );
233 |     }
234 | 
235 |     #[test]
236 |     fn second_string_empty() {
237 |         assert_delta!(
238 |             Some(0.0),
239 |             Some(ratio_with_args(
240 |                 "".chars(),
241 |                 "test".chars(),
242 |                 &Args::default()
243 |             ))
244 |         );
245 |     }
246 | 
247 |     // https://github.com/rapidfuzz/RapidFuzz/issues/206
248 |     #[test]
249 |     fn issue206() {
250 |         let str1 = "South Korea";
251 |         let str2 = "North Korea";
252 | 
253 |         {
254 |             let score = ratio(str1.chars(), str2.chars());
255 | 
256 |             assert_eq!(
257 |                 None,
258 |                 ratio_with_args(
259 |                     str1.chars(),
260 |                     str2.chars(),
261 |                     &Args::default().score_cutoff(score + 0.0001)
262 |                 )
263 |             );
264 |             assert_delta!(
265 |                 Some(score),
266 |                 ratio_with_args(
267 |                     str1.chars(),
268 |                     str2.chars(),
269 |                     &Args::default().score_cutoff(score - 0.0001)
270 |                 )
271 |             );
272 |         }
273 |     }
274 | 
275 |     // https://github.com/rapidfuzz/RapidFuzz/issues/210
276 |     #[test]
277 |     fn issue210() {
278 |         let str1 = "bc";
279 |         let str2 = "bca";
280 | 
281 |         {
282 |             let score = ratio(str1.chars(), str2.chars());
283 | 
284 |             assert_eq!(
285 |                 None,
286 |                 ratio_with_args(
287 |                     str1.chars(),
288 |                     str2.chars(),
289 |                     &Args::default().score_cutoff(score + 0.0001)
290 |                 )
291 |             );
292 |             assert_delta!(
293 |                 Some(score),
294 |                 ratio_with_args(
295 |                     str1.chars(),
296 |                     str2.chars(),
297 |                     &Args::default().score_cutoff(score - 0.0001)
298 |                 )
299 |             );
300 |         }
301 |     }
302 | }
303 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! `RapidFuzz` is a general purpose string matching library with implementations
  2 | //! for Rust, C++ and Python.
  3 | //!
  4 | //! ## Key Features
  5 | //!
  6 | //! - **Diverse String Metrics**: Offers a variety of string metrics
  7 | //!   to suit different use cases. These range from the Levenshtein
  8 | //!   distance for edit-based comparisons to the Jaro-Winkler similarity for
  9 | //!   more nuanced similarity assessments.
 10 | //! - **Optimized for Speed**: The library is designed with performance in mind.
 11 | //!   Each implementation is carefully designed to ensure optimal performance,
 12 | //!   making it suitable for the analysis of large datasets.
 13 | //! - **Easy to use**: The API is designed to be simple to use, while still giving
 14 | //!   the implementation room for optimization.
 15 | //!
 16 | //! ## Installation
 17 | //!
 18 | //! The installation is as simple as:
 19 | //! ```console
 20 | //! $ cargo add rapidfuzz
 21 | //! ```
 22 | //!
 23 | //! ## Usage
 24 | //!
 25 | //! The following examples show the usage with the [`Levenshtein`] distance. Other metrics
 26 | //! can be found in the [`fuzz`] and [`distance`] modules.
 27 | //!
 28 | //! ```rust
 29 | //! use rapidfuzz::distance::levenshtein;
 30 | //!
 31 | //! // Perform a simple comparision using he levenshtein distance
 32 | //! assert_eq!(
 33 | //!     3,
 34 | //!     levenshtein::distance("kitten".chars(), "sitting".chars())
 35 | //! );
 36 | //!
 37 | //! // If you are sure the input strings are ASCII only it's usually faster to operate on bytes
 38 | //! assert_eq!(
 39 | //!     3,
 40 | //!     levenshtein::distance("kitten".bytes(), "sitting".bytes())
 41 | //! );
 42 | //!
 43 | //! // You can provide a score_cutoff value to filter out strings with distance that is worse than
 44 | //! // the score_cutoff
 45 | //! assert_eq!(
 46 | //!     None,
 47 | //!     levenshtein::distance_with_args(
 48 | //!         "kitten".chars(),
 49 | //!         "sitting".chars(),
 50 | //!         &levenshtein::Args::default().score_cutoff(2)
 51 | //!     )
 52 | //! );
 53 | //!
 54 | //! // You can provide a score_hint to tell the implementation about the expected score.
 55 | //! // This can be used to select a more performant implementation internally, but might cause
 56 | //! // a slowdown in cases where the distance is actually worse than the score_hint
 57 | //! assert_eq!(
 58 | //!     3,
 59 | //!     levenshtein::distance_with_args(
 60 | //!         "kitten".chars(),
 61 | //!         "sitting".chars(),
 62 | //!         &levenshtein::Args::default().score_hint(2)
 63 | //!     )
 64 | //! );
 65 | //!
 66 | //! // When comparing a single string to multiple strings you can use the
 67 | //! // provided `BatchComparators`. These can cache part of the calculation
 68 | //! // which can provide significant speedups
 69 | //! let scorer = levenshtein::BatchComparator::new("kitten".chars());
 70 | //! assert_eq!(3, scorer.distance("sitting".chars()));
 71 | //! assert_eq!(0, scorer.distance("kitten".chars()));
 72 | //! ```
 73 | //!
 74 | //! [`Levenshtein`]: distance/levenshtein/index.html
 75 | //! [`fuzz`]: fuzz/index.html
 76 | //! [`distance`]: distance/index.html
 77 | 
 78 | #![forbid(unsafe_code)]
 79 | #![allow(
 80 |     // these casts are sometimes needed. They restrict the length of input iterators
 81 |     // but there isn't really any way around this except for always working with
 82 |     // 128 bit types
 83 |     clippy::cast_possible_truncation,
 84 |     clippy::cast_possible_wrap,
 85 |     clippy::cast_sign_loss,
 86 |     clippy::cast_precision_loss,
 87 |     // things are often more readable this way
 88 |     clippy::module_name_repetitions,
 89 |     // not practical
 90 |     clippy::needless_pass_by_value,
 91 |     clippy::similar_names,
 92 |     clippy::too_many_lines,
 93 |     // noisy
 94 |     clippy::missing_errors_doc,
 95 | )]
 96 | 
 97 | pub mod common;
 98 | pub(crate) mod details;
 99 | pub mod distance;
100 | pub mod fuzz;
101 | 
102 | /// Hash value in the range `i64::MIN` - `u64::MAX`
103 | #[derive(Debug, Copy, Clone)]
104 | pub enum Hash {
105 |     UNSIGNED(u64),
106 |     SIGNED(i64),
107 | }
108 | 
109 | /// trait used to map between element types and unique hash values
110 | ///
111 | /// `RapidFuzz` already implements this trait for most primitive types.
112 | /// For custom types this trat can be used to support the internal hashmaps.
113 | /// There are a couple of things to keep in mind when implementing this trait:
114 | /// - hashes have to be a unique value in the range `i64::MIN` - `u64::MAX`.
115 | ///   If two distinct objects produce the same hash, they will be assumed to be similar
116 | ///   by the hashmap.
117 | /// - the hash function should be very fast. For primitive types it can just be the identity
118 | ///   function
119 | /// - the hashmaps are optimized for extended ASCII, so values in the range 0-255 generally
120 | ///   provide a better performance.
121 | ///
122 | /// # Example
123 | /// ```
124 | /// use rapidfuzz::distance;
125 | /// use rapidfuzz::{Hash, HashableChar};
126 | ///
127 | /// #[derive(PartialEq)]
128 | /// struct MyType {
129 | ///     val: u64,
130 | /// }
131 | ///
132 | /// impl HashableChar for &MyType {
133 | ///     fn hash_char(&self) -> Hash {
134 | ///         Hash::UNSIGNED(self.val)
135 | ///     }
136 | /// }
137 | ///
138 | /// assert_eq!(
139 | ///     1,
140 | ///     distance::levenshtein::distance(
141 | ///         &[MyType { val: 1 }, MyType { val: 1 }],
142 | ///         &[MyType { val: 2 }, MyType { val: 1 }],
143 | ///     )
144 | /// );
145 | /// ```
146 | pub trait HashableChar {
147 |     fn hash_char(&self) -> Hash;
148 | }
149 | 


--------------------------------------------------------------------------------