├── .gitignore
├── .github
    └── workflows
    │   ├── main.yml
    │   └── release.yml
├── tests
    ├── parse_tests.rs
    ├── list_reading_tests.rs
    ├── ignore_tests.rs
    ├── edit_distance_tests.rs
    ├── pruning_tests.rs
    ├── uniquely_decodable_tests.rs
    ├── list_information_tests.rs
    └── list_manipulation_tests.rs
├── Cargo.toml
├── dist-workspace.toml
├── LICENSE
├── Dockerfile
├── src
    ├── cards.rs
    ├── display_information
    │   ├── uniquely_decodable.rs
    │   └── mod.rs
    ├── schlinkert_pruning.rs
    ├── edit_distance.rs
    ├── file_writer.rs
    ├── parsers.rs
    ├── dice.rs
    ├── input_validations.rs
    ├── file_readers.rs
    ├── list_manipulations.rs
    ├── lib.rs
    └── main.rs
├── CHANGELOG.md
├── wordlists-to-tidy.markdown
├── deny.toml
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | l
3 | scrap_code.markdown
4 | scrap_code.mdown
5 | scrap_code.txt
6 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - "main"
 7 | 
 8 | jobs:
 9 |   build-and-push-docker-image:
10 |     name: Build and push the Docker image
11 |     runs-on: debian-latest
12 |     steps:
13 |       - name: Login to GitHub Packages Docker Registry
14 |         uses: docker/login-action@v1
15 |         with:
16 |           registry: ghcr.io
17 |           username: ${{ github.repository_owner }}
18 |           password: ${{ secrets.GITHUB_TOKEN }}
19 |       - name: Build
20 |         uses: docker/build-push-action@v3
21 |         with:
22 |           platforms: linux/amd64,linux/arm64
23 |           push: true
24 | 


--------------------------------------------------------------------------------
/tests/parse_tests.rs:
--------------------------------------------------------------------------------
 1 | mod parse_tests {
 2 |     use tidy::parsers::eval_list_length;
 3 | 
 4 |     #[test]
 5 |     fn can_parse_print_rand() {
 6 |         assert_eq!(eval_list_length("7776").unwrap(), 7776);
 7 |         assert_eq!(eval_list_length("6**5").unwrap(), 7776);
 8 |         assert_eq!(eval_list_length("10000").unwrap(), 10000);
 9 |         assert_eq!(eval_list_length("10**2").unwrap(), 100);
10 |     }
11 | 
12 |     #[test]
13 |     fn panics_when_noninteger_is_inputted_to_print_rand() {
14 |         assert!(eval_list_length("four").is_err());
15 |     }
16 | 
17 |     #[test]
18 |     fn panics_when_too_many_exponents_inputted_to_print_rand() {
19 |         assert!(eval_list_length("2**4**3").is_err());
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "tidy"
 3 | version = "0.3.17"
 4 | authors = ["sts10 <sschlinkert@gmail.com>"]
 5 | edition = "2024"
 6 | license = "MIT"
 7 | readme = "readme.markdown"
 8 | repository = "https://github.com/sts10/tidy"
 9 | description = "Combine and clean word lists"
10 | categories = ["command-line-utilities"]
11 | 
12 | [dependencies]
13 | clap = { version = "4.5.18", features = ["derive"] }
14 | memchr = "2.7.4"
15 | radix_fmt = "1.0.0"
16 | rand = "0.9.0"
17 | itertools = "0.14.0"
18 | unicode-normalization = "0.1.24"
19 | unicode-segmentation = "1.12.0"
20 | # icu = "1.5.0"
21 | icu = "2.0.0"
22 | serde = { version = "1.0", features = ["derive"] }
23 | serde_json = "1.0"
24 | 
25 | # The profile that 'cargo dist' will build with
26 | [profile.dist]
27 | inherits = "release"
28 | lto = "thin"
29 | 


--------------------------------------------------------------------------------
/dist-workspace.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = ["cargo:."]
 3 | 
 4 | # Config for 'dist'
 5 | [dist]
 6 | # The preferred dist version to use in CI (Cargo.toml SemVer syntax)
 7 | cargo-dist-version = "0.30.2"
 8 | # CI backends to support
 9 | ci = "github"
10 | # The installers to generate for each app
11 | installers = ["shell"]
12 | # Target platforms to build apps for (Rust target-triple syntax)
13 | targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "aarch64-pc-windows-msvc", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-unknown-linux-musl", "x86_64-pc-windows-msvc"]
14 | # Which actions to run on pull requests
15 | pr-run-mode = "plan"
16 | # Whether to install an updater program
17 | install-updater = false
18 | # Path that installers should place binaries in
19 | install-path = "CARGO_HOME"
20 | 


--------------------------------------------------------------------------------
/tests/list_reading_tests.rs:
--------------------------------------------------------------------------------
 1 | mod list_reading_tests {
 2 |     use crate::list_reading_tests::file_readers::blend;
 3 |     use tidy::*;
 4 | 
 5 |     #[test]
 6 |     fn can_blend_multiple_lists() {
 7 |         let word_lists_by_file = vec![
 8 |             vec!["one".to_string(), "three".to_string(), "five".to_string()],
 9 |             vec![
10 |                 "two".to_string(),
11 |                 "four".to_string(),
12 |                 "six".to_string(),
13 |                 "eight".to_string(),
14 |                 "ten".to_string(),
15 |             ],
16 |         ];
17 |         let blended_list = blend(&word_lists_by_file);
18 | 
19 |         assert_eq!(
20 |             blended_list,
21 |             ["one", "two", "three", "four", "five", "six", "eight", "ten"].to_vec()
22 |         );
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020-2024 Sam Schlinkert
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/ignore_tests.rs:
--------------------------------------------------------------------------------
 1 | mod ignore_tests {
 2 |     use tidy::*;
 3 | 
 4 |     fn make_list() -> Vec<String> {
 5 |         vec!["mA1,word1 mB1", "mA2,word2 mB2", "mA3,word3 mB3", "A,B,C"]
 6 |             .iter()
 7 |             .map(|x| x.to_string())
 8 |             .collect()
 9 |     }
10 | 
11 |     #[test]
12 |     fn can_ignore_metadata_before_a_delimiter() {
13 |         let this_tidy_request = TidyRequest {
14 |             list: make_list(),
15 |             ignore_before_delimiter: Some(','),
16 |             maximum_length: Some(10),
17 |             ..Default::default()
18 |         };
19 |         let new_list = tidy_list(this_tidy_request);
20 |         assert!(new_list.contains(&"mA1,word1 mB1".to_string()));
21 |     }
22 |     #[test]
23 |     fn can_ignore_metadata_after_a_delimiter() {
24 |         let this_tidy_request = TidyRequest {
25 |             list: make_list(),
26 |             ignore_after_delimiter: Some('s'),
27 |             maximum_length: Some(10),
28 |             ..Default::default()
29 |         };
30 |         let new_list = tidy_list(this_tidy_request);
31 |         println!("{:?}", new_list);
32 |         assert!(new_list.contains(&"mA1,word1 mB1".to_string()));
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:alpine as builder
 2 | # Workaround "cannot find crti.o"
 3 | RUN apk add --no-cache musl-dev
 4 | WORKDIR /app
 5 | # First, only copy over the Cargo manifests, and do a build with a dummy
 6 | # main.rs. This way, Docker can cache all of this, and only actually download
 7 | # and build those depepndencies once. This is nice, because otherwise it's
 8 | # quite slow.
 9 | COPY Cargo.toml Cargo.lock .
10 | RUN mkdir src && echo "fn main() { }" > src/main.rs
11 | RUN cargo build --release
12 | RUN rm -r src/
13 | # The above should get cached; but the below steps will be executed each
14 | # time you change the source code and do 'docker build'.
15 | COPY . .
16 | RUN cargo build --release
17 | RUN strip target/release/tidy
18 | 
19 | FROM scratch
20 | COPY --from=builder /app/target/release/tidy /bin/tidy
21 | ENTRYPOINT ["/bin/tidy"]
22 | 
23 | # Add some metadata about the image, for extra neatness.
24 | LABEL org.opencontainers.image.title="tidy" \
25 |       org.opencontainers.image.description="A command-line tool for combining and cleaning large word list files" \
26 |       org.opencontainers.image.url="https://github.com/sts10/tidy" \
27 |       org.opencontainers.image.authors="Sam Schlinkert <sschlinkert@gmail.com" \
28 |       org.opencontainers.image.licenses="MIT"
29 | 


--------------------------------------------------------------------------------
/tests/edit_distance_tests.rs:
--------------------------------------------------------------------------------
 1 | mod edit_distance_tests {
 2 |     use tidy::edit_distance::find_edit_distance;
 3 | 
 4 |     #[test]
 5 |     fn equal_strings() {
 6 |         assert_eq!(0, find_edit_distance("Hello, world!", "Hello, world!"));
 7 |         assert_eq!(0, find_edit_distance("Test_Case_#1", "Test_Case_#1"));
 8 |     }
 9 | 
10 |     #[test]
11 |     fn one_edit_difference() {
12 |         assert_eq!(1, find_edit_distance("Hello, world!", "Hell, world!"));
13 |         assert_eq!(1, find_edit_distance("Test_Case_#1", "Test_Case_#2"));
14 |         assert_eq!(1, find_edit_distance("Test_Case_#1", "Test_Case_#10"));
15 |     }
16 | 
17 |     #[test]
18 |     fn several_differences() {
19 |         assert_eq!(2, find_edit_distance("My Cat", "My Case"));
20 |         assert_eq!(7, find_edit_distance("Hello, world!", "Goodbye, world!"));
21 |         assert_eq!(6, find_edit_distance("Test_Case_#3", "Case #3"))
22 |     }
23 | 
24 |     #[test]
25 |     fn order_agnostic() {
26 |         assert_eq!(2, find_edit_distance("My Cat", "My Case"));
27 |         assert_eq!(2, find_edit_distance("My Case", "My Cat"));
28 |         assert_eq!(7, find_edit_distance("Hello, world!", "Goodbye, world!"));
29 |         assert_eq!(7, find_edit_distance("Goodbye, world!", "Hello, world!"));
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/pruning_tests.rs:
--------------------------------------------------------------------------------
 1 | mod pruning_tests {
 2 |     use tidy::display_information::uniquely_decodable::is_uniquely_decodable;
 3 |     use tidy::*;
 4 | 
 5 |     #[test]
 6 |     fn can_resist_pruning_a_list_that_is_already_uniquely_decodable() {
 7 |         let list: Vec<String> = vec!["101", "00", "0001", "1"]
 8 |             .iter()
 9 |             .map(|w| w.to_string())
10 |             .collect();
11 |         let this_tidy_request = TidyRequest {
12 |             list: list.clone(),
13 |             should_schlinkert_prune: true,
14 |             ..Default::default()
15 |         };
16 |         let new_list = tidy_list(this_tidy_request);
17 |         assert_eq!(list, new_list);
18 |     }
19 | 
20 |     #[test]
21 |     fn can_run_schlinkert_prune_on_reversed_list_if_it_saves_more_words() {
22 |         let list: Vec<String> = vec![
23 |             "news",
24 |             "paper",
25 |             "newspaper",
26 |             "donkey",
27 |             "newsdonkey",
28 |             "ghost",
29 |             "newsghost",
30 |             "radish",
31 |             "newsradish",
32 |         ]
33 |         .iter()
34 |         .map(|w| w.to_string())
35 |         .collect();
36 | 
37 |         let this_tidy_request = TidyRequest {
38 |             list: list,
39 |             should_schlinkert_prune: true,
40 |             ..Default::default()
41 |         };
42 |         let new_list = tidy_list(this_tidy_request);
43 |         // If Schlinkert prune was done in forwards, only
44 |         // 5 words would be saved. But if we Schlinkert
45 |         // prune the reversed list, we save 8 words.
46 |         assert!(new_list.len() == 8);
47 |         // And now let's confirm that the new list is indeed
48 |         // uniquely decodable, at least as far as Tidy is able
49 |         // to confirm.
50 |         assert!(is_uniquely_decodable(&new_list));
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/cards.rs:
--------------------------------------------------------------------------------
 1 | use radix_fmt::*; // https://stackoverflow.com/a/50278316
 2 | 
 3 | /// Convert a number in a "card code" (base 26)
 4 | pub fn print_as_cards(n: usize, list_length: usize) -> String {
 5 |     let n_as_base_26 = radix(n, 26);
 6 | 
 7 |     // Pad card codes with zeros
 8 |     let n_width = n_as_base_26.to_string().len();
 9 |     let pad_width = radix(list_length - 1, 26).to_string().len();
10 | 
11 |     let mut padded_n = String::new();
12 |     for _i in n_width..pad_width {
13 |         padded_n.push('0');
14 |     }
15 |     // Now that we have the appropriate number of zeros
16 |     // in `padded_n`, it's time to add our number
17 |     padded_n += &n_as_base_26.to_string();
18 | 
19 |     padded_n
20 |         .to_string()
21 |         .chars()
22 |         .map(|ch| char_to_card(ch) + "-")
23 |         .collect::<String>()
24 |         .trim_end_matches('-')
25 |         .trim()
26 |         .to_string()
27 | }
28 | 
29 | /// Convert 0-z inputted character to a 3-character "card code"
30 | fn char_to_card(ch: char) -> String {
31 |     match ch {
32 |         '0' => "B02",
33 |         '1' => "B03",
34 |         '2' => "B04",
35 |         '3' => "B05",
36 |         '4' => "B06",
37 |         '5' => "B07",
38 |         '6' => "B08",
39 |         '7' => "B09",
40 |         '8' => "B10",
41 |         '9' => "BJa",
42 |         'a' => "BQu",
43 |         'b' => "BKi",
44 |         'c' => "BAc",
45 |         'd' => "R02",
46 |         'e' => "R03",
47 |         'f' => "R04",
48 |         'g' => "R05",
49 |         'h' => "R06",
50 |         'i' => "R07",
51 |         'j' => "R08",
52 |         'k' => "R09",
53 |         'l' => "R10",
54 |         'm' => "RJa",
55 |         'n' => "RQu",
56 |         'o' => "RKi",
57 |         'p' => "RAc",
58 |         _ => panic!("Unable to convert this number from a letter to a card code."),
59 |     }
60 |     .to_string()
61 | }
62 | 


--------------------------------------------------------------------------------
/src/display_information/uniquely_decodable.rs:
--------------------------------------------------------------------------------
 1 | //! This is a (rather clumsily) implementation of the Sardinas-Patterson algorithm
 2 | //! by Sam Schlinkert.
 3 | //! The goal is to check if a word list (`c`) is uniquely decodable.
 4 | //!
 5 | //! I followed
 6 | //! <https://github.com/danhales/blog-sardinas-patterson/blob/master/index.ipynb>
 7 | //! very closely. Since then, other contributors have refactored it.
 8 | use std::collections::HashSet;
 9 | 
10 | /// Return true if the list is uniquely decodable, false if not. I
11 | /// don't _think_ we need to check reversed words in this case.
12 | pub fn is_uniquely_decodable<T: AsRef<str>>(c: &[T]) -> bool {
13 |     sardinas_patterson_theorem(c.iter().map(|f| f.as_ref()).collect())
14 | }
15 | 
16 | /// Generate c for any number n
17 | fn generate_cn<'a>(c: &HashSet<&'a str>, cn_minus_1: &HashSet<&'a str>) -> HashSet<&'a str> {
18 |     let mut cn = HashSet::new();
19 | 
20 |     for w1 in c.iter() {
21 |         for w2 in cn_minus_1.iter() {
22 |             if w1.len() > w2.len() && w1.starts_with(w2) {
23 |                 // w2 is a prefix word of w1
24 |                 // so, we're going to add the dangling suffix to a new HashSet
25 |                 // called cn
26 |                 cn.insert(&w1[w2.len()..]);
27 |             }
28 |             if w2.len() > w1.len() && w2.starts_with(w1) {
29 |                 // w1 is a prefix word of w2
30 |                 // so, we're going to add the dangling suffix to a new HashSet
31 |                 // called cn
32 |                 cn.insert(&w2[w1.len()..]);
33 |             }
34 |         }
35 |     }
36 |     cn
37 | }
38 | 
39 | fn generate_c_infinity_with_a_halt_break<'a>(c: &'a HashSet<&str>) -> HashSet<&'a str> {
40 |     let mut cn = generate_cn(c, c);
41 |     let mut cs = cn.clone();
42 | 
43 |     loop {
44 |         cn = generate_cn(c, &cn);
45 |         let prior = cs.len();
46 |         cs.extend(&cn);
47 |         if cs.len() == prior {
48 |             // if the set size did not increase, cn is a subset
49 |             // Cycle detected. Halting algorithm.
50 |             break;
51 |         }
52 |     }
53 |     cs
54 | }
55 | 
56 | /// Returns true if c is uniquely decodable
57 | fn sardinas_patterson_theorem(c: HashSet<&str>) -> bool {
58 |     let c_infinity = generate_c_infinity_with_a_halt_break(&c);
59 |     c.is_disjoint(&c_infinity)
60 | }
61 | 


--------------------------------------------------------------------------------
/src/schlinkert_pruning.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashSet;
 2 | 
 3 | /// Return a Vector of words that "caused" the Sardinas-Patterson algorithm to
 4 | /// determine that this list was not uniquely decodable.
 5 | /// These "offending" words can then be removed from the original
 6 | /// list to, theoretically, make the list uniquely decodable.
 7 | pub fn get_sardinas_patterson_final_intersection<T: AsRef<str>>(c: &[T]) -> Vec<String> {
 8 |     // Convert c to a HashSet, I think
 9 |     let c = c.iter().map(|f| f.as_ref()).collect();
10 | 
11 |     let c_infinity = generate_c_infinity_with_a_halt_break(&c);
12 |     // We want to collect a list of words that "caused" the Sardinas-Patterson algorithm
13 |     // to determine that this list was not uniquely decodable.
14 |     // If the given list is in fact uniquely decodable, this list of words will be empty.
15 |     // If there are words in the list, we'll return those to src/lib to be
16 |     // removed from the final list.
17 |     let final_intersection = c.intersection(&c_infinity);
18 |     Vec::from_iter(final_intersection)
19 |         .iter()
20 |         .map(|w| w.to_string())
21 |         .collect()
22 | }
23 | 
24 | /// Generate c for any number n
25 | fn generate_cn<'a>(c: &HashSet<&'a str>, cn_minus_1: &HashSet<&'a str>) -> HashSet<&'a str> {
26 |     let mut cn = HashSet::new();
27 | 
28 |     for w1 in c.iter() {
29 |         for w2 in cn_minus_1.iter() {
30 |             if w1.len() > w2.len() && w1.starts_with(w2) {
31 |                 // w2 is a prefix word of w1
32 |                 // so, we're going to add the dangling suffix to a new HashSet
33 |                 // called cn
34 |                 cn.insert(&w1[w2.len()..]);
35 |             }
36 |             if w2.len() > w1.len() && w2.starts_with(w1) {
37 |                 // w1 is a prefix word of w2
38 |                 // so, we're going to add the dangling suffix to a new HashSet
39 |                 // called cn
40 |                 cn.insert(&w2[w1.len()..]);
41 |             }
42 |         }
43 |     }
44 |     cn
45 | }
46 | 
47 | fn generate_c_infinity_with_a_halt_break<'a>(c: &'a HashSet<&str>) -> HashSet<&'a str> {
48 |     let mut cn = generate_cn(c, c);
49 |     let mut cs = cn.clone();
50 | 
51 |     loop {
52 |         cn = generate_cn(c, &cn);
53 |         let prior = cs.len();
54 |         cs.extend(&cn);
55 |         if cs.len() == prior {
56 |             // if the set size did not increase, cn is a subset
57 |             // Cycle detected. Halting algorithm.
58 |             break;
59 |         }
60 |     }
61 |     cs
62 | }
63 | 


--------------------------------------------------------------------------------
/src/edit_distance.rs:
--------------------------------------------------------------------------------
 1 | //! Compute the edit distance between two strings
 2 | 
 3 | use std::cmp::min;
 4 | 
 5 | /// `find_edit_distance(str_a, str_b)` returns the edit distance between the two
 6 | /// strings. This edit distance is defined as being 1 point per insertion,
 7 | /// substitution, or deletion which must be made to make the strings equal.
 8 | ///
 9 | /// I adapted this function from one I found in the
10 | /// [TheAlgorithms/Rust repo on Github](https://github.com/TheAlgorithms/Rust/blob/master/src/dynamic_programming/edit_distance.rs).
11 | ///
12 | /// Instead of storing the `m * n` matrix expicitly, only one row (of length `n`) is stored.
13 | /// It keeps overwriting itself based on its previous values with the help of two scalars,
14 | /// gradually reaching the last row. Then, the score is `matrix[n]`.
15 | pub fn find_edit_distance(str_a: &str, str_b: &str) -> u32 {
16 |     let (str_a, str_b) = (str_a.as_bytes(), str_b.as_bytes());
17 |     let (m, n) = (str_a.len(), str_b.len());
18 |     let mut distances: Vec<u32> = vec![0; n + 1]; // the dynamic programming matrix (only 1 row stored)
19 |     let mut s: u32; // distances[i - 1][j - 1] or distances[i - 1][j]
20 |     let mut c: u32; // distances[i][j - 1] or distances[i][j]
21 |     let mut char_a: u8; // str_a[i - 1] the i-th character in str_a; only needs to be computed once per row
22 |     let mut char_b: u8; // str_b[j - 1] the j-th character in str_b
23 | 
24 |     // 0th row
25 |     for (j, v) in distances.iter_mut().enumerate().take(n + 1).skip(1) {
26 |         *v = j as u32;
27 |     }
28 |     // rows 1 to m
29 |     for i in 1..=m {
30 |         s = (i - 1) as u32;
31 |         c = i as u32;
32 |         char_a = str_a[i - 1];
33 |         for j in 1..=n {
34 |             // c is distances[i][j-1] and s is distances[i-1][j-1] at the beginning of each round of iteration
35 |             char_b = str_b[j - 1];
36 |             c = min(
37 |                 s + if char_a == char_b { 0 } else { 1 },
38 |                 min(c + 1, distances[j] + 1),
39 |             );
40 |             // c is updated to distances[i][j], and will thus become distances[i][j-1] for the next cell
41 |             s = distances[j]; // here distances[j] means distances[i-1][j] becuase it has not been overwritten yet
42 |             // s is updated to distances[i-1][j], and will thus become distances[i-1][j-1] for the next cell
43 |             distances[j] = c; // now distances[j] is updated to distances[i][j], and will thus become distances[i-1][j] for the next ROW
44 |         }
45 |     }
46 | 
47 |     distances[n]
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/uniquely_decodable_tests.rs:
--------------------------------------------------------------------------------
 1 | mod uniquely_decodable_tests {
 2 |     use tidy::display_information::uniquely_decodable::is_uniquely_decodable;
 3 | 
 4 |     #[test]
 5 |     fn can_determine_a_list_with_prefix_words_is_not_uniquely_decodable() {
 6 |         let list: Vec<String> = vec!["news", "newspaper", "paper", "elephant"]
 7 |             .iter()
 8 |             .map(|x| x.to_string())
 9 |             .collect();
10 | 
11 |         assert!(!is_uniquely_decodable(&list));
12 | 
13 |         let list2: Vec<String> = vec![
14 |             "spill".to_string(),
15 |             "sun".to_string(),
16 |             "moved".to_string(),
17 |             "spills".to_string(),
18 |             "unmoved".to_string(),
19 |         ];
20 |         assert!(!is_uniquely_decodable(&list2));
21 |     }
22 | 
23 |     #[test]
24 |     fn can_determine_that_a_list_is_uniquely_decodable() {
25 |         let list: Vec<String> = vec![
26 |             "excursion",
27 |             "friday",
28 |             "gyration",
29 |             "natural",
30 |             "pentagon",
31 |             "sheath",
32 |             "silver",
33 |             "starless",
34 |             "underling",
35 |             "unmarked",
36 |             "untaxed",
37 |             "zippy",
38 |         ]
39 |         .iter()
40 |         .map(|w| w.to_string())
41 |         .collect();
42 |         assert!(is_uniquely_decodable(&list));
43 |     }
44 | 
45 |     #[test]
46 |     fn can_determine_binary_code_with_a_suffix_code_is_not_uniquely_decodable() {
47 |         let list: Vec<String> = vec!["02", "12", "120", "20", "21"]
48 |             .iter()
49 |             .map(|w| w.to_string())
50 |             .collect();
51 |         assert!(!is_uniquely_decodable(&list));
52 |     }
53 | 
54 |     #[test]
55 |     fn given_a_series_of_binary_codes_can_determine_which_are_uniquely_decodable() {
56 |         let list: Vec<String> = vec!["0", "10", "110", "111"]
57 |             .iter()
58 |             .map(|w| w.to_string())
59 |             .collect();
60 |         assert!(is_uniquely_decodable(&list));
61 | 
62 |         let list: Vec<String> = vec!["0", "10", "010", "101"]
63 |             .iter()
64 |             .map(|w| w.to_string())
65 |             .collect();
66 |         assert!(!is_uniquely_decodable(&list));
67 | 
68 |         let list: Vec<String> = vec!["0", "01", "011", "0111"]
69 |             .iter()
70 |             .map(|w| w.to_string())
71 |             .collect();
72 |         assert!(is_uniquely_decodable(&list));
73 | 
74 |         // '0, 1, 00, 11' is not an uniquely decodable code
75 |         let list: Vec<String> = vec!["0", "1", "00", "11"]
76 |             .iter()
77 |             .map(|w| w.to_string())
78 |             .collect();
79 |         assert!(!is_uniquely_decodable(&list));
80 |     }
81 | 
82 |     #[test]
83 |     fn knows_that_a_fixed_length_code_is_uniquely_decodable() {
84 |         let list: Vec<String> = vec![
85 |             "buoy", "cote", "dads", "duel", "gale", "life", "lurk", "peer", "rain", "tong",
86 |         ]
87 |         .iter()
88 |         .map(|w| w.to_string())
89 |         .collect();
90 |         assert!(is_uniquely_decodable(&list));
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/tests/list_information_tests.rs:
--------------------------------------------------------------------------------
  1 | mod list_information_tests {
  2 |     use tidy::display_information::*;
  3 |     // use tidy::*;
  4 | 
  5 |     #[test]
  6 |     fn can_calculate_entropy_per_word_of_generated_list() {
  7 |         assert_eq!(calc_entropy_per_word(7_776), 12.92481250360578);
  8 |         assert_eq!(calc_entropy_per_word(16_103), 13.975041868009528);
  9 |         assert_eq!(calc_entropy_per_word(18_318), 14.160974374927935);
 10 |     }
 11 | 
 12 |     #[test]
 13 |     fn can_calculate_assumed_entropy_per_character_of_generated_list() {
 14 |         let list: Vec<String> = vec!["to", "canopy", "cold", "seasons", "fire", "Christmas"]
 15 |             .iter()
 16 |             .map(|x| x.to_string())
 17 |             .collect();
 18 |         assert_eq!(assumed_entropy_per_character(&list), 1.292481250360578);
 19 |     }
 20 | 
 21 |     #[test]
 22 |     fn can_calculate_mean_edit_distance() {
 23 |         let list: Vec<String> = vec![
 24 |             "bat", "cat", "rat", "hat", "mat", "tat", "fat", "oat", "pat", "sat", "vat",
 25 |         ]
 26 |         .iter()
 27 |         .map(|x| x.to_string())
 28 |         .collect();
 29 |         assert_eq!(find_mean_edit_distance(&list), 1.0);
 30 | 
 31 |         let list2: Vec<String> = vec!["abcd", "abce", "abxz"]
 32 |             .iter()
 33 |             .map(|x| x.to_string())
 34 |             .collect();
 35 |         assert_eq!(find_mean_edit_distance(&list2), 1.6666666666666667);
 36 | 
 37 |         let list3: Vec<String> = vec!["abcd", "abce", "abxz", "abpt"]
 38 |             .iter()
 39 |             .map(|x| x.to_string())
 40 |             .collect();
 41 |         assert_eq!(find_mean_edit_distance(&list3), (11.0 / 6.0) as f64);
 42 |     }
 43 | 
 44 |     #[test]
 45 |     fn can_find_first_different_character() {
 46 |         assert_eq!(
 47 |             find_first_different_character_zero_indexed("apple", "zebra"),
 48 |             0
 49 |         );
 50 |         assert_eq!(
 51 |             find_first_different_character_zero_indexed("berry", "bicker"),
 52 |             1
 53 |         );
 54 |         assert_eq!(
 55 |             find_first_different_character_zero_indexed("hello", "help"),
 56 |             3
 57 |         );
 58 |         assert_eq!(
 59 |             find_first_different_character_zero_indexed("radius", "radical"),
 60 |             4
 61 |         );
 62 |         assert_eq!(
 63 |             find_first_different_character_zero_indexed("zip", "zippy"),
 64 |             3
 65 |         );
 66 |         assert_eq!(
 67 |             find_first_different_character_zero_indexed("zippy", "zip"),
 68 |             3
 69 |         );
 70 |     }
 71 | 
 72 |     #[test]
 73 |     fn can_find_longest_shared_prefix_in_a_list() {
 74 |         let list: Vec<String> = vec![
 75 |             "to",
 76 |             "canopy",
 77 |             "cold",
 78 |             "academia",
 79 |             "academic",
 80 |             "seasons",
 81 |             "fire",
 82 |             "Christmas",
 83 |         ]
 84 |         .iter()
 85 |         .map(|x| x.to_string())
 86 |         .collect();
 87 |         assert_eq!(find_longest_shared_prefix(&list, None), 7);
 88 | 
 89 |         let list: Vec<String> = vec!["to", "canopy", "cancel", "seasons", "fire", "Christmas"]
 90 |             .iter()
 91 |             .map(|x| x.to_string())
 92 |             .collect();
 93 |         assert_eq!(find_longest_shared_prefix(&list, None), 3);
 94 |     }
 95 |     #[test]
 96 |     fn can_get_shortest_word_length() {
 97 |         let list: Vec<String> = vec!["canopy", "to", "cold", "seasons", "fire", "Christmas"]
 98 |             .iter()
 99 |             .map(|x| x.to_string())
100 |             .collect();
101 |         assert_eq!(get_shortest_word_length(&list), 2);
102 |     }
103 |     #[test]
104 |     fn can_get_mean_word_length() {
105 |         let list: Vec<String> = vec!["canopy", "to", "cold", "seasons", "fire", "Christmas"]
106 |             .iter()
107 |             .map(|x| x.to_string())
108 |             .collect();
109 |         assert_eq!(mean_word_length(&list), 5.3333335);
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # v0.3.17
 2 | * Uses new version of `cargo-dist` (v0.28.2) to cut a fresh release, including binaries.
 3 | 
 4 | # v0.3.16
 5 | * Fixes error when parsing system language/locale! Use "-" rather than underscores ("\_")!
 6 | 
 7 | # v0.3.15
 8 | **WARNING:** Do NOT use this release. Has bug when parsing detected system language.
 9 | * Upgrades `icu` crate dependency to latest version, v2.0. Completes #61.
10 | 
11 | # v0.3.14
12 | * Upgrade to Rust edition 2024.
13 | * Reformats entire codebase.
14 | 
15 | # v0.3.13
16 | 
17 | * Upgrades rand and itertools dependencies ([#59](https://github.com/sts10/tidy/pull/59))
18 | * Uses latest version of cargo-dist to create new release
19 | 
20 | # v0.3.12
21 | 
22 | * Upgrades some dependencies, including the version of cargo-dist used.
23 | 
24 | # v0.3.10 
25 | 
26 | * Adds a new option to sort words by length (`--sort-by-length`). (Thanks to @latk for help with this work.) See [this blog post](https://sts10.github.io/2024/07/06/double-sorting.html) for more about this new feature.
27 | 
28 | # v0.3.9
29 | 
30 | * Update all dependencies that have new versions
31 | * Uses version 0.14.1 of cargo-dist to create release binaries and a shell installation script.
32 | 
33 | # v0.3.8
34 | 
35 | * Uses version 0.8.0 of cargo-dist to create release binaries and a shell installation script.
36 | # v0.3.8
37 | 
38 | * Uses version 0.8.0 of cargo-dist to create release binaries and a shell installation script.
39 | 
40 | # v0.3.7
41 | 
42 | First release using [cargo-dist](https://opensource.axo.dev/cargo-dist/). Should create binaries for Mac and Windows users. Cool!
43 | 
44 | # v0.3.0
45 | 
46 | The big new feature in this release is that users can optionally print attributes and word samples in JSON format. 
47 | 
48 | ## Changes
49 | * d06d1ea - Uses an enum for result of Kraft-McMillan Inequality 
50 | * abe465d - only calculates longest word length once, in order to be more efficient
51 | * a979645 - brings help text up to date with JSON feature  
52 | * fdf4071 - print word samples within JSON output
53 | * dad0cd6 - gives credit back to Kraft!
54 | * f77ec28 - more concise creation of `ListAttributes` object. Also think I made the shared prefix calculation a bit faster 
55 | * 8549df7 - make shared prefix optional, since it takes a while
56 | * 95d72b6 - improves the descriptiveness of a function name
57 | * 4fed268 - fixes spelling of 'unique' in new display attributes code 
58 | * b07f7dc - puts `ListAttributes` into a new enum, adds feature of printing list attributes in JSON
59 | 
60 | # v0.2.91
61 | 
62 | Mostly housekeeping in this release.
63 | 
64 | * 0a6a78b - moves Shannon line boolean attribute behind 5 As rather than 4, since it's a pretty dubious attribute at this point  
65 | * 67ab0ca - adds link to NSA's password generator and its word list
66 | * d3f3549 - fixes mistake in explanation of unique decodability in readme 
67 | * dc4828e - adds some metadata to Cargo.toml for thoroughness
68 | * 80181b0 - adds upgrade and uninstall information to the readme  
69 | * 84bf97a - updates word sample language in readme 
70 | 
71 | # v0.2.90
72 | 
73 | The big change in this release is that Tidy now performs Schlinkert pruning both on the list as given, _and_ the list where every word is reversed. 
74 | 
75 | Performing the Schlinkert prune on the reversed words is equivalent to using prefix words in Sardinas-Patterson algorithm, rather than suffix words. Tidy now tries both, preferring whichever process saves more words on the original list. This is the case on the BIPS39 English word list. See #43 for more information.
76 | 
77 | ## Commits with major changes
78 | * 1de5d1c - adds a test to make sure Tidy runs Schlinkert pruning the reversed list
79 | * be38459 - when reversing words before doing the Schlinkert prune, use graphemes rather than characters to better attempt to handle accented characters and emoji  
80 | * 8ac7782 - executes Schlinkert prune in both directions, then prefer whichever saves the most words
81 | * d681136 - Adds deny.toml to ease compatibility checks
82 | * 24063ce - doesn't print a space after 6th word of each sample
83 | 
84 | 
85 | Also various function and variable renaming for clarity and, as usual, other updates to the README. 
86 | 


--------------------------------------------------------------------------------
/src/file_writer.rs:
--------------------------------------------------------------------------------
  1 | use crate::cards::print_as_cards;
  2 | use crate::dice::print_as_dice;
  3 | use crate::display_information::display_list_information;
  4 | use std::fs::File;
  5 | use std::io::Write;
  6 | use std::path::PathBuf;
  7 | 
  8 | #[derive(Default, Debug, Clone)]
  9 | pub struct PrintRequest {
 10 |     pub tidied_list: Vec<String>,
 11 |     pub dry_run: bool,
 12 |     pub quiet: bool,
 13 |     pub output: Option<PathBuf>,
 14 |     pub dice_sides: Option<u8>,
 15 |     pub cards: bool,
 16 |     pub print_dice_sides_as_their_base: bool,
 17 |     pub attributes: u8,
 18 |     pub attributes_as_json: bool,
 19 |     pub samples: bool,
 20 |     pub ignore_before_delimiter: Option<char>,
 21 |     pub ignore_after_delimiter: Option<char>,
 22 | }
 23 | 
 24 | /// Print to terminal or file
 25 | pub fn print_list(print_req: PrintRequest) {
 26 |     if !print_req.quiet {
 27 |         if print_req.tidied_list.is_empty() {
 28 |             eprintln!(
 29 |                 "WARNING: All words removed (tidied list is empty). Check inputted list and given options."
 30 |             );
 31 |         } else if !print_req.dry_run {
 32 |             eprintln!("Printing new list...");
 33 |         }
 34 |     }
 35 |     if !print_req.dry_run {
 36 |         match print_req.output {
 37 |             Some(output) => {
 38 |                 // Print to file
 39 |                 print_list_to_file(
 40 |                     &print_req.tidied_list,
 41 |                     output,
 42 |                     print_req.cards,
 43 |                     print_req.dice_sides,
 44 |                     print_req.print_dice_sides_as_their_base,
 45 |                 );
 46 |             }
 47 |             // If no output file destination, print resulting list, word by word,
 48 |             // to println (which goes to stdout, allowing use of > on command line)
 49 |             None => {
 50 |                 for (i, word) in print_req.tidied_list.iter().enumerate() {
 51 |                     if let Some(dice_sides) = print_req.dice_sides {
 52 |                         print!(
 53 |                             "{:}\t",
 54 |                             print_as_dice(
 55 |                                 i,
 56 |                                 dice_sides,
 57 |                                 print_req.tidied_list.len(),
 58 |                                 print_req.print_dice_sides_as_their_base
 59 |                             )
 60 |                         );
 61 |                     } else if print_req.cards {
 62 |                         print!("{:}\t", print_as_cards(i, print_req.tidied_list.len()));
 63 |                     }
 64 |                     println!("{}", word);
 65 |                 }
 66 |             }
 67 |         }
 68 |     }
 69 |     if !print_req.quiet {
 70 |         if !print_req.dry_run && !print_req.tidied_list.is_empty() {
 71 |             eprintln!("\nDone making list.");
 72 |         } else if print_req.dry_run {
 73 |             eprintln!("Dry run complete");
 74 |         }
 75 |         if print_req.attributes > 0 || print_req.samples {
 76 |             display_list_information(
 77 |                 &print_req.tidied_list,
 78 |                 print_req.attributes,
 79 |                 print_req.attributes_as_json,
 80 |                 print_req.ignore_after_delimiter,
 81 |                 print_req.ignore_before_delimiter,
 82 |                 print_req.samples,
 83 |             );
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | fn print_list_to_file(
 89 |     tidied_list: &[String],
 90 |     output: PathBuf,
 91 |     cards: bool,
 92 |     dice_sides: Option<u8>,
 93 |     print_dice_sides_as_their_base: bool,
 94 | ) {
 95 |     let mut f = File::create(output).expect("Unable to create file");
 96 |     for (i, word) in tidied_list.iter().enumerate() {
 97 |         // If user set a number of dice_sides, we'll add the appropriate
 98 |         // dice roll information, then a tab, then the word.
 99 |         if let Some(dice_sides) = dice_sides {
100 |             write!(
101 |                 f,
102 |                 "{}\t",
103 |                 print_as_dice(
104 |                     i,
105 |                     dice_sides,
106 |                     tidied_list.len(),
107 |                     print_dice_sides_as_their_base
108 |                 ),
109 |             )
110 |             .expect("Unable to write dice roll to file");
111 |         } else if cards {
112 |             write!(f, "{}\t", print_as_cards(i, tidied_list.len()))
113 |                 .expect("Unable to write corresponding card to file");
114 |         }
115 | 
116 |         writeln!(f, "{}", word).expect("Unable to write word to file");
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/parsers.rs:
--------------------------------------------------------------------------------
 1 | /// Parse user's input to a handful of options, either directly as a `usize`,
 2 | /// or, if they entered Python exponent notation (base**exponent), which
 3 | /// we'll need to evaluate as an exponent. Either way, return a `usize`
 4 | /// or `expect`/`panic!`.
 5 | ///  
 6 | /// This is useful when making lists fit to a specific amount of dice and
 7 | /// dice sides. (As an example, five rolls of a six-sided dice would be: 6**5).
 8 | pub fn eval_list_length(input: &str) -> Result<usize, String> {
 9 |     match input.split("**").collect::<Vec<&str>>().as_slice() {
10 |         [] => Err("Please specify a number.".to_string()),
11 |         [num_string] => num_string.parse::<usize>().map_err(|_| {
12 |             format!(
13 |                 "Unable to parse input {}. Enter a number or a base**exponent",
14 |                 input
15 |             )
16 |         }),
17 |         [base_string, exponent_string] => {
18 |             let base: usize = base_string
19 |                 .parse::<usize>()
20 |                 .map_err(|_| format!("Unable to parse input {}. Positive integers only.", input))?;
21 |             let exponent: u32 = exponent_string
22 |                 .parse::<u32>()
23 |                 .map_err(|_| format!("Unable to parse input {}. Positive integers only.", input))?;
24 |             Ok(base.pow(exponent))
25 |         }
26 |         _ => Err("You can only specify one exponent! Use format: base**exponent".to_string()),
27 |     }
28 | }
29 | 
30 | use crate::TidyRequest;
31 | use crate::split_and_vectorize;
32 | pub fn parse_whittle_options(
33 |     mut this_tidy_request: TidyRequest,
34 |     whittle_to_s: Option<String>,
35 | ) -> Result<(TidyRequest, Option<usize>, Option<usize>), String> {
36 |     match whittle_to_s {
37 |         Some(whittle_to_string) => {
38 |             // Some whittle_to String has been provided, which we need to do a lot of work for
39 |             // First, parse length_to_whittle_to
40 |             let length_to_whittle_to =
41 |                 eval_list_length(split_and_vectorize(&whittle_to_string, ",")[0]).unwrap();
42 |             // Determine initial starting point
43 |             let starting_point = if split_and_vectorize(&whittle_to_string, ",").len() == 2 {
44 |                 // If user gave us one, use that.
45 |                 split_and_vectorize(&whittle_to_string, ",")[1]
46 |                     .parse::<usize>()
47 |                     .unwrap_or((length_to_whittle_to as f64 * 1.4) as usize)
48 |             } else {
49 |                 // If not, start with length_to_whittle_to*1.4 as a decent opening guess.
50 |                 // Effectively this assumes we'll cut about 40% of words in most
51 |                 // Tidy runs.
52 |                 (length_to_whittle_to as f64 * 1.4) as usize
53 |             };
54 |             // It's possible that our derive starting_point is higher than the length
55 |             // of our inputted_word_list. If that's the case, reset starting_point
56 |             // to that length.
57 |             let starting_point = if starting_point > this_tidy_request.list.len() {
58 |                 this_tidy_request.list.len()
59 |             } else {
60 |                 // if not, we're good. Let given starting_point pass through.
61 |                 starting_point
62 |             };
63 | 
64 |             // Another potential issue: User is asking for too many words, given length of
65 |             // the inputted_word_list (which would be a problem!)
66 |             if length_to_whittle_to > this_tidy_request.list.len() {
67 |                 let error_msg = format!(
68 |                     "ERROR: Cannot make a list of {} words from the inputted list(s), given the selected options. Please try again, either by changing options or inputting more words.",
69 |                     length_to_whittle_to
70 |                 );
71 |                 return Err(error_msg);
72 |             }
73 | 
74 |             // Give user a heads up that we're working on it.
75 |             eprintln!(
76 |                 "Whittling list to {} words. This may take a moment...",
77 |                 length_to_whittle_to
78 |             );
79 | 
80 |             // When whittling, confidentally overwrite a few request parameters
81 |             this_tidy_request.take_first = Some(starting_point);
82 |             this_tidy_request.take_rand = None;
83 |             this_tidy_request.print_rand = None;
84 |             this_tidy_request.print_first = None;
85 | 
86 |             Ok((
87 |                 this_tidy_request,
88 |                 Some(length_to_whittle_to),
89 |                 Some(starting_point),
90 |             ))
91 |         }
92 |         None => Ok((this_tidy_request, None, None)),
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/dice.rs:
--------------------------------------------------------------------------------
  1 | use radix_fmt::*; // https://stackoverflow.com/a/50278316
  2 | /// Print dice rolls before each corresponding word. Note
  3 | /// that the `n` parameter should be zero-indexed. A tab (`\t`)
  4 | /// is printed between the dice roll and the word.
  5 | ///
  6 | /// The `base` parameter represents the number of sides of the
  7 | /// dice, which can be set from 2 to 9.
  8 | ///
  9 | /// Here's an example of an outputted word list with base 6:
 10 | /// ```text
 11 | /// 11111    aback
 12 | /// 11112    abandons
 13 | /// 11113    abated
 14 | /// 11114    abbey
 15 | /// 11115    abbot
 16 | /// 11116    abbreviated
 17 | /// 11121    abdomen
 18 | /// 11122    abducted
 19 | /// 11123    aberrant
 20 | /// 11124    abide
 21 | /// 11125    ability
 22 | /// 11126    abject
 23 | /// 11131    abnormally
 24 | /// // etc.
 25 | /// ```
 26 | ///
 27 | /// If this base is between 4 and 8,
 28 | /// this function assumes the user will be using actual dice, which are index at 1.
 29 | /// Thus, `if 4 <= base && base <= 8`, we add `1` to each digit of the dice
 30 | /// roll before printing it.
 31 | ///
 32 | /// I wish I could replicate this radix function easily without the dependency,
 33 | /// but that doesn't seem [very easy](https://stackoverflow.com/a/50278316).
 34 | pub fn print_as_dice(n: usize, base: u8, list_length: usize, use_letters: bool) -> String {
 35 |     // Set width for zero-padding
 36 | 
 37 |     // First, get the literal width of the largest number we'll be printing.
 38 |     // This is, by definition the length of the list.
 39 |     // We want the length of the number in the base we want to print all
 40 |     // the numbers, so use radix function.
 41 |     let n_as_base = radix(n, base);
 42 | 
 43 |     // Pad dice roll numbers with zeros
 44 |     let n_width = n_as_base.to_string().len();
 45 |     let pad_width = radix(list_length - 1, base).to_string().len();
 46 | 
 47 |     let mut padded_n = String::new();
 48 |     for _i in n_width..pad_width {
 49 |         padded_n.push('0');
 50 |     }
 51 |     // Now that we have the appropriate number of zeros
 52 |     // in `padded_n`, it's time to add our number
 53 |     padded_n += &n_as_base.to_string();
 54 | 
 55 |     // Print the dice rolls in slightly different ways,
 56 |     // depending on the value of the base.
 57 |     if use_letters {
 58 |         // We'll use zero-indexed values if sides_as_letters is
 59 |         // selected
 60 |         match base {
 61 |             // Values of 0 and 1 should have been caught earlier,
 62 |             // so we'll panic! if we have them here
 63 |             0 | 1 => panic!("Too few dice sides entered"),
 64 |             2..=36 => padded_n
 65 |                 .chars()
 66 |                 .map(|ch| ch.to_string().to_uppercase())
 67 |                 .collect::<String>()
 68 |                 .trim()
 69 |                 .to_string(),
 70 |             _ => panic!("Amount of dice sides received is too high"),
 71 |         }
 72 |     } else {
 73 |         // We'll use 1-indexed values if sides_as_letters is NOT
 74 |         // selected
 75 |         match base {
 76 |             0 | 1 => panic!("Too few dice sides entered"),
 77 |             2..=9 => padded_n
 78 |                 .chars()
 79 |                 .map(|ch| (ch.to_string().parse::<usize>().unwrap() + 1).to_string())
 80 |                 .collect::<String>(),
 81 |             10..=36 => padded_n
 82 |                 .chars()
 83 |                 .map(|ch| char_to_digit(ch) + "-")
 84 |                 .collect::<String>()
 85 |                 .trim_end_matches('-')
 86 |                 .trim()
 87 |                 .to_string(),
 88 |             _ => panic!("Amount of dice sides received is too high"),
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | /// Convert 0-z inputted character to a 1-indexed, padded string ("01" to "36")
 94 | fn char_to_digit(ch: char) -> String {
 95 |     match ch {
 96 |         '0' => "01",
 97 |         '1' => "02",
 98 |         '2' => "03",
 99 |         '3' => "04",
100 |         '4' => "05",
101 |         '5' => "06",
102 |         '6' => "07",
103 |         '7' => "08",
104 |         '8' => "09",
105 |         '9' => "10",
106 |         'a' => "11",
107 |         'b' => "12",
108 |         'c' => "13",
109 |         'd' => "14",
110 |         'e' => "15",
111 |         'f' => "16",
112 |         'g' => "17",
113 |         'h' => "18",
114 |         'i' => "19",
115 |         'j' => "20",
116 |         'k' => "21",
117 |         'l' => "22",
118 |         'm' => "23",
119 |         'n' => "24",
120 |         'o' => "25",
121 |         'p' => "26",
122 |         'q' => "27",
123 |         'r' => "28",
124 |         's' => "29",
125 |         't' => "30",
126 |         'u' => "31",
127 |         'v' => "32",
128 |         'w' => "33",
129 |         'x' => "34",
130 |         'y' => "35",
131 |         'z' => "36",
132 |         _ => panic!("Unable to convert this dice number from a letter to a number."),
133 |     }
134 |     .to_string()
135 | }
136 | 


--------------------------------------------------------------------------------
/src/input_validations.rs:
--------------------------------------------------------------------------------
  1 | pub fn validate_dice_sides(dice_sides: Option<u8>) -> Result<(), &'static str> {
  2 |     if let Some(dice_sides) = dice_sides {
  3 |         if !(2 <= dice_sides && dice_sides <= 36) {
  4 |             return Err("Error: Specified number of dice sides must be between 2 and 36.");
  5 |         }
  6 |     }
  7 |     Ok(())
  8 | }
  9 | 
 10 | pub fn validate_list_truncation_options(
 11 |     whittle_to: &Option<String>,
 12 |     cut_to: Option<usize>,
 13 |     take_first: Option<usize>,
 14 |     take_rand: Option<usize>,
 15 | ) -> Result<(), &'static str> {
 16 |     // Check for invalid whittle_to requests
 17 |     if whittle_to.is_some() && cut_to.is_some() {
 18 |         Err(
 19 |             "Error: Can not specify BOTH a 'cut to' and 'whittle to' option. Please only use one of these two.",
 20 |         )
 21 |     } else if whittle_to.is_some() && (take_first.is_some() || take_rand.is_some()) {
 22 |         Err(
 23 |             "Error: Can not specify BOTH a 'whittle to' amount and a 'take first' or 'take rand' amount. Please only specify a whittle-to amount or a take amount.",
 24 |         )
 25 |     } else {
 26 |         Ok(())
 27 |     }
 28 | }
 29 | 
 30 | use crate::TidyRequest;
 31 | pub fn validate_and_parse_ignore_options(
 32 |     this_tidy_request: &TidyRequest,
 33 |     dice_sides: Option<u8>,
 34 |     print_dice_sides_as_their_base: bool,
 35 | ) -> Result<(Option<char>, Option<char>), &'static str> {
 36 |     // Warn about the (many!) current limitations of the 'ignore' options
 37 |     match (
 38 |         this_tidy_request.ignore_after_delimiter,
 39 |         this_tidy_request.ignore_before_delimiter,
 40 |     ) {
 41 |         // If given both a from_delimiter and through_delimiter, error out nicely.
 42 |         (Some(_after_delimiter), Some(_before_delimiter)) => {
 43 |             let err_message = "Can't ignore metadata on both sides.";
 44 |             Err(err_message)
 45 |         }
 46 |         // No ignore delimiters given, so just return None to both
 47 |         // variables.
 48 |         (None, None) => Ok((None, None)),
 49 |         // A after_delimiter given, but not a before_delimiter
 50 |         (Some(after_delimiter), None) => {
 51 |             if this_tidy_request.to_lowercase
 52 |                 || this_tidy_request.should_straighten_quotes
 53 |                 || this_tidy_request.should_remove_prefix_words
 54 |                 || this_tidy_request.should_remove_suffix_words
 55 |                 || this_tidy_request.should_schlinkert_prune
 56 |                 || this_tidy_request.should_delete_nonalphanumeric
 57 |                 || this_tidy_request.should_delete_integers
 58 |                 || this_tidy_request
 59 |                     .should_delete_before_first_delimiter
 60 |                     .is_some()
 61 |                 || this_tidy_request
 62 |                     .should_delete_after_first_delimiter
 63 |                     .is_some()
 64 |                 || this_tidy_request.minimum_edit_distance.is_some()
 65 |                 || this_tidy_request.maximum_shared_prefix_length.is_some()
 66 |                 || this_tidy_request.homophones_list.is_some()
 67 |                 || dice_sides.is_some()
 68 |                 || print_dice_sides_as_their_base
 69 |             {
 70 |                 let err_message = "--ignore-after option does not work with one of the other options you selected. Please change options. Exiting";
 71 |                 Err(err_message)
 72 |             } else {
 73 |                 Ok((Some(after_delimiter), None))
 74 |             }
 75 |         }
 76 |         // No after_delimiter given, but a before_delimiter has been given
 77 |         (None, Some(before_delimiter)) => {
 78 |             if this_tidy_request.to_lowercase
 79 |                 || this_tidy_request.should_straighten_quotes
 80 |                 || this_tidy_request.should_remove_prefix_words
 81 |                 || this_tidy_request.should_remove_suffix_words
 82 |                 || this_tidy_request.should_schlinkert_prune
 83 |                 || this_tidy_request.should_delete_nonalphanumeric
 84 |                 || this_tidy_request.should_delete_integers
 85 |                 || this_tidy_request
 86 |                     .should_delete_before_first_delimiter
 87 |                     .is_some()
 88 |                 || this_tidy_request
 89 |                     .should_delete_after_first_delimiter
 90 |                     .is_some()
 91 |                 || this_tidy_request.minimum_edit_distance.is_some()
 92 |                 || this_tidy_request.maximum_shared_prefix_length.is_some()
 93 |                 || this_tidy_request.homophones_list.is_some()
 94 |                 || dice_sides.is_some()
 95 |                 || print_dice_sides_as_their_base
 96 |             {
 97 |                 let err_message = "--ignore-before option does not work with one of the other options you selected. Please change options. Exiting";
 98 |                 Err(err_message)
 99 |             } else {
100 |                 Ok((None, Some(before_delimiter)))
101 |             }
102 |         }
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/file_readers.rs:
--------------------------------------------------------------------------------
  1 | use crate::split_and_vectorize;
  2 | use std::fs::File;
  3 | use std::io::BufRead;
  4 | use std::io::BufReader;
  5 | use std::path::PathBuf;
  6 | 
  7 | /// Takes a slice of `PathBuf`s representing the word list(s)
  8 | /// that the user has inputted to the program. Then iterates
  9 | /// through each file and addes each line to `Vec<String>`. (Blank
 10 | /// lines and duplicate links will be handled elsewhere.)
 11 | pub fn make_vec_from_filenames(
 12 |     filenames: &[PathBuf],
 13 |     skip_rows_start: Option<usize>,
 14 |     skip_rows_end: Option<usize>,
 15 | ) -> Vec<String> {
 16 |     let mut word_lists_by_file: Vec<Vec<String>> = [].to_vec();
 17 |     for filename in filenames {
 18 |         let f = match File::open(filename) {
 19 |             Ok(file) => file,
 20 |             Err(e) => panic!("Error opening file {:?}: {}", filename, e),
 21 |         };
 22 |         let file = BufReader::new(&f);
 23 |         let mut raw_lines = vec![];
 24 |         for line in file.lines() {
 25 |             let l = match line {
 26 |                 Ok(l) => l,
 27 |                 Err(e) => {
 28 |                     eprintln!(
 29 |                         "Error reading a line from file {:?}: {}\nWill continue reading file.",
 30 |                         filename, e
 31 |                     );
 32 |                     continue;
 33 |                 }
 34 |             };
 35 |             raw_lines.push(l);
 36 |         }
 37 |         let size_of_raw_lines = raw_lines.len();
 38 |         let mut word_list_from_this_file = [].to_vec();
 39 |         for (line_number, line) in raw_lines.into_iter().enumerate() {
 40 |             match (skip_rows_start, skip_rows_end) {
 41 |                 (Some(skip_rows_start), Some(skip_rows_end)) => {
 42 |                     if line_number >= skip_rows_start
 43 |                         && line_number < size_of_raw_lines - skip_rows_end
 44 |                     {
 45 |                         word_list_from_this_file.push(line);
 46 |                     }
 47 |                 }
 48 |                 (Some(skip_rows_start), None) => {
 49 |                     if line_number >= skip_rows_start {
 50 |                         word_list_from_this_file.push(line);
 51 |                     }
 52 |                 }
 53 |                 (None, Some(skip_rows_end)) => {
 54 |                     if line_number < size_of_raw_lines - skip_rows_end {
 55 |                         word_list_from_this_file.push(line);
 56 |                     }
 57 |                 }
 58 |                 (None, None) => word_list_from_this_file.push(line),
 59 |             }
 60 |         }
 61 |         word_lists_by_file.push(word_list_from_this_file);
 62 |     }
 63 |     // Finally, "blend" words into one Vec<String>
 64 |     blend(&word_lists_by_file)
 65 | }
 66 | 
 67 | /// "Blend" words together one at a time, like dealing cards in reverse
 68 | pub fn blend(word_lists_by_file: &[Vec<String>]) -> Vec<String> {
 69 |     let mut size_of_longest_vector = 0;
 70 |     for word_list in word_lists_by_file {
 71 |         if size_of_longest_vector < word_list.len() {
 72 |             size_of_longest_vector = word_list.len();
 73 |         }
 74 |     }
 75 |     // "Blend" words together one at a time, like dealing cards in reverse
 76 |     let mut blended = [].to_vec();
 77 |     for i in 0..size_of_longest_vector {
 78 |         for list in word_lists_by_file {
 79 |             if list.len() > i {
 80 |                 // Dunno how to not call clone here...
 81 |                 blended.push(list[i].clone());
 82 |             }
 83 |         }
 84 |     }
 85 |     blended
 86 | }
 87 | 
 88 | /// Like `make_vec_from_filenames`, this function takes a slice of `PathBuf`s of
 89 | /// files. But in this case these files represent lists of homophones that the
 90 | /// user wants to make sure aren't both on the resulting list.
 91 | ///
 92 | /// These homophone files are expected to be formatted such that each line of the file
 93 | /// is `homophone1,homophone2`.
 94 | ///
 95 | /// This function produces a Vector of tuples of strings, representing the
 96 | /// homophone pairs.
 97 | pub fn read_homophones_list_from_filenames(filenames: &[PathBuf]) -> Vec<(String, String)> {
 98 |     let mut homophones_list: Vec<(String, String)> = vec![];
 99 |     for filename in filenames {
100 |         let f = match File::open(filename) {
101 |             Ok(file) => file,
102 |             Err(e) => panic!("Error opening file {:?}: {}", filename, e),
103 |         };
104 |         let file = BufReader::new(&f);
105 |         for line in file.lines() {
106 |             let l = match line {
107 |                 Ok(l) => l,
108 |                 Err(e) => {
109 |                     eprintln!(
110 |                         "Error reading a line from file {:?}: {}\nWill continue reading file.",
111 |                         filename, e
112 |                     );
113 |                     continue;
114 |                 }
115 |             };
116 |             let pair: (String, String) = (
117 |                 split_and_vectorize(&l, ",")[0].trim().to_string(),
118 |                 split_and_vectorize(&l, ",")[1].trim().to_string(),
119 |             );
120 |             homophones_list.push(pair);
121 |         }
122 |     }
123 |     homophones_list
124 | }
125 | 


--------------------------------------------------------------------------------
/wordlists-to-tidy.markdown:
--------------------------------------------------------------------------------
 1 | # Where can I find some word lists?
 2 | 
 3 | ## Diceware lists (generally 7,776 words long)
 4 | -   The [Electronic Frontier Foundation](https://www.eff.org/) has published [a few word lists for creating diceware passphrases](https://www.eff.org/deeplinks/2016/07/new-wordlists-random-passphrases).
 5 |     -   I'm pretty sure password manager BitWarden uses [the EFF long list](https://www.eff.org/files/2016/07/18/eff_large_wordlist.txt).
 6 |     -   [KeePassXC](https://keepassxc.org/) uses [the EFF long list with some minor modifications](https://github.com/keepassxreboot/keepassxc/blob/develop/share/wordlists/eff_large.wordlist).
 7 |     -   Note: These lists often have a tab between the dice numbers and each word. Tidy can delete the dice numbers easily with something like `tidy -D t -o clean_eff.txt eff_large_wordlist.txt` or using the `-i` flag.
 8 | -   [ulif's "diceware"](https://github.com/ulif/diceware) seems to have collected [a few word lists](https://github.com/ulif/diceware/tree/master/diceware/wordlists) in its Github repo, along with [a separate page that explains each of the lists](https://github.com/ulif/diceware/blob/master/docs/wordlists.rst).
 9 | -   [dmuth's "diceware" program](https://github.com/dmuth/diceware) has a [collection of lists](https://github.com/dmuth/diceware/tree/master/wordlist) (h/t [atoponce](https://www.reddit.com/r/Passwords/comments/sqrymt/comment/hwnfb94/))
10 | -   [Original "Reinhold" diceware list](https://theworld.com/%7Ereinhold/diceware.wordlist.asc) created by [Arnold Reinhold](https://theworld.com/~reinhold/). Though it has some issues.
11 |     -   Arnold Reinhold hosts [diceware lists in a variety of languages](https://theworld.com/~reinhold/diceware.html#Diceware%20in%20Other%20Languages|outline).
12 | 
13 | ## Short word lists
14 | -   [Bitcoin BIPS-0039](https://github.com/bitcoin/bips/tree/master/bip-0039) (2,048 words) (h/t [atoponce](https://www.reddit.com/r/Passwords/comments/sqrymt/comment/hwnfb94/))
15 | -   [Monero's word list](https://github.com/monero-project/monero/blob/master/src/mnemonics/english.h) (1,626 words) (h/t [atoponce](https://www.reddit.com/r/Passwords/comments/sqrymt/comment/hwnfb94/))
16 | -   [Mnemonicode](https://github.com/schollz/mnemonicode/blob/master/word_list.go) is another word list optimized for pronunciation. I believe [croc](https://github.com/schollz/croc), a file transferring tool, uses it.
17 | -   [Magic Wormhole](https://github.com/magic-wormhole/magic-wormhole/), a tool for transferring files, uses [a version of the PGP Word List](https://github.com/magic-wormhole/magic-wormhole/blob/master/src/wormhole/_wordlist.py), which specifically tries to use pairs of words that are phonetically distinct.
18 | -   [Wagashi](https://codeberg.org/azukitofu/wagashi) is a set of new, short word lists in English. They also offer emoji lists.
19 | -   [Session's English word list](https://github.com/oxen-io/session-desktop/blob/unstable/mnemonic_languages/english.json)
20 | -   [simple1024](https://github.com/pera/simple1024) is a word list with 1024 common English words, an alternative to EFF's short word lists.
21 | 
22 | ## Pretty long word lists
23 | -   If you're using Linux or MacOS, you've likely got some long lists on your computer. Check `/usr/share/dict/words` or `/usr/share/dict/american-english`.
24 | -  [NSA's RandPassGenerator](https://github.com/nsacyber/RandPassGenerator) uses [a massive 117,828-word list](https://github.com/nsacyber/RandPassGenerator/blob/master/RandPassGenerator/data/wordlist.txt).
25 | -   [Niceware list](https://github.com/diracdeltas/niceware/blob/master/lib/wordlist.js) (~65,000 words). [I used Tidy to help create v 4.0.0 of this list](https://github.com/diracdeltas/niceware/pull/52)!<!-- (there's also [a Rust port of niceware](https://github.com/healeycodes/niceware)).-->
26 | -   [Norvig Natural Language Corpus Data](https://norvig.com/ngrams/) has [a list of 333,000 commonly used words](https://norvig.com/ngrams/count_1w.txt) from the Google Web Trillion Word Corpus, as well as an assortment of other word lists.
27 | -   [British National Corpus (BNC) database and word frequency lists](https://www.kilgarriff.co.uk/bnc-readme.html)
28 | -   [Lists used by a program called webpassgen](https://github.com/atoponce/webpassgen/tree/master/lists)
29 | -   [SCOWL (Spell Checker Oriented Word Lists) and Friends](http://wordlist.aspell.net/) is a database of information on English words useful for creating high-quality word lists suitable for use in spell checkers of most dialects of English."
30 |     -   [ENABLE2K](https://web.archive.org/web/20090122025747/http://personal.riverusers.com/~thegrendel/software.html) seems to be an older version of the SCOWL project? 
31 | 
32 | ## Collections of word lists
33 | -   A collection of a few [Public Domain Word Lists](https://github.com/MichaelWehar/Public-Domain-Word-Lists)
34 | -   [**A great list of word lists** by Aaron Toponce](https://gist.github.com/atoponce/95c4f36f2bc12ec13242a3ccc55023af).
35 | -   [A list of word lists](http://www.webplaces.com/passwords/passphrase-word-lists.htm).
36 | -   [Danish wordlists](https://github.com/n0kovo/danish-wordlists) is a "collection of [Danish] wordlists for cracking danish passwords"
37 | -   [r/wordlists subreddit](https://www.reddit.com/r/wordlists/), which seems to have links to a few non-English word lists.
38 | -   You can also scan [GitHub's #wordlists topic](https://github.com/topics/wordlists)
39 | -   An XKCD-inspired passphrase generator with [a collection of non-English word lists](https://github.com/redacted/XKCD-password-generator/tree/master/xkcdpass/static).
40 | 
41 | ## Various 
42 | -   The EFF also has some [fandom-inspired lists](https://www.eff.org/deeplinks/2018/08/dragon-con-diceware). They use a space between dice numbers and words, so Tidy can clean up with the `-D s` option. I prefer [Aaron Toponce's proposed _new_ fandom word lists](https://github.com/sts10/new-fandom-wordlists).
43 | -   I'm pretty sure this is [1Password](https://1password.com/)'s [word list](https://1password.com/txt/agwordlist.txt) as of 2021.
44 |     -   1Password published a few slightly different word lists ([one](https://github.com/1Password/spg/blob/master/testdata/agwordlist.txt), [two](https://github.com/agilebits/crackme/blob/master/doc/AgileWords.txt)) in 2018.
45 | -   [SecureDrop](https://github.com/freedomofpress/securedrop/) has a few lists, including one of [adjectives](https://github.com/freedomofpress/securedrop/blob/develop/securedrop/dictionaries/adjectives.txt) and one of [nouns](https://github.com/freedomofpress/securedrop/blob/develop/securedrop/dictionaries/nouns.txt).
46 | -   [Jitsi](https://meet.jit.si/) has [lists of nouns, verbs, adjectives, and adverbs](https://github.com/jitsi/js-utils/blob/1c57316514a602f3888f4aafb047e8288066186e/random/roomNameGenerator.js) for generating random room names.
47 | -   [A German word list that looks promising](https://github.com/martinhoefling/goxkcdpwgen/blob/master/wordlists/de-7776-v1-diceware.txt)
48 |     
49 | ## Shameless plug
50 | 
51 | -   I used Tidy to create [Orchard Street Wordlists](https://github.com/sts10/orchard-street-wordlists) ([as well as a few other word lists](https://github.com/sts10/generated-wordlists)).
52 | 


--------------------------------------------------------------------------------
/deny.toml:
--------------------------------------------------------------------------------
  1 | # This template contains all of the possible sections and their default values
  2 | 
  3 | # Note that all fields that take a lint level have these possible values:
  4 | # * deny - An error will be produced and the check will fail
  5 | # * warn - A warning will be produced, but the check will not fail
  6 | # * allow - No warning or error will be produced, though in some cases a note
  7 | # will be
  8 | 
  9 | # The values provided in this template are the default values that will be used
 10 | # when any section or field is not specified in your own configuration
 11 | 
 12 | # Root options
 13 | 
 14 | # The graph table configures how the dependency graph is constructed and thus
 15 | # which crates the checks are performed against
 16 | [graph]
 17 | # If 1 or more target triples (and optionally, target_features) are specified,
 18 | # only the specified targets will be checked when running `cargo deny check`.
 19 | # This means, if a particular package is only ever used as a target specific
 20 | # dependency, such as, for example, the `nix` crate only being used via the
 21 | # `target_family = "unix"` configuration, that only having windows targets in
 22 | # this list would mean the nix crate, as well as any of its exclusive
 23 | # dependencies not shared by any other crates, would be ignored, as the target
 24 | # list here is effectively saying which targets you are building for.
 25 | targets = [
 26 |     # The triple can be any string, but only the target triples built in to
 27 |     # rustc (as of 1.40) can be checked against actual config expressions
 28 |     #"x86_64-unknown-linux-musl",
 29 |     # You can also specify which target_features you promise are enabled for a
 30 |     # particular target. target_features are currently not validated against
 31 |     # the actual valid features supported by the target architecture.
 32 |     #{ triple = "wasm32-unknown-unknown", features = ["atomics"] },
 33 | ]
 34 | # When creating the dependency graph used as the source of truth when checks are
 35 | # executed, this field can be used to prune crates from the graph, removing them
 36 | # from the view of cargo-deny. This is an extremely heavy hammer, as if a crate
 37 | # is pruned from the graph, all of its dependencies will also be pruned unless
 38 | # they are connected to another crate in the graph that hasn't been pruned,
 39 | # so it should be used with care. The identifiers are [Package ID Specifications]
 40 | # (https://doc.rust-lang.org/cargo/reference/pkgid-spec.html)
 41 | #exclude = []
 42 | # If true, metadata will be collected with `--all-features`. Note that this can't
 43 | # be toggled off if true, if you want to conditionally enable `--all-features` it
 44 | # is recommended to pass `--all-features` on the cmd line instead
 45 | all-features = false
 46 | # If true, metadata will be collected with `--no-default-features`. The same
 47 | # caveat with `all-features` applies
 48 | no-default-features = false
 49 | # If set, these feature will be enabled when collecting metadata. If `--features`
 50 | # is specified on the cmd line they will take precedence over this option.
 51 | #features = []
 52 | 
 53 | # The output table provides options for how/if diagnostics are outputted
 54 | [output]
 55 | # When outputting inclusion graphs in diagnostics that include features, this
 56 | # option can be used to specify the depth at which feature edges will be added.
 57 | # This option is included since the graphs can be quite large and the addition
 58 | # of features from the crate(s) to all of the graph roots can be far too verbose.
 59 | # This option can be overridden via `--feature-depth` on the cmd line
 60 | feature-depth = 1
 61 | 
 62 | # This section is considered when running `cargo deny check advisories`
 63 | # More documentation for the advisories section can be found here:
 64 | # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
 65 | [advisories]
 66 | # The path where the advisory databases are cloned/fetched into
 67 | #db-path = "$CARGO_HOME/advisory-dbs"
 68 | # The url(s) of the advisory databases to use
 69 | #db-urls = ["https://github.com/rustsec/advisory-db"]
 70 | # A list of advisory IDs to ignore. Note that ignored advisories will still
 71 | # output a note when they are encountered.
 72 | ignore = [
 73 |     #"RUSTSEC-0000-0000",
 74 |     #{ id = "RUSTSEC-0000-0000", reason = "you can specify a reason the advisory is ignored" },
 75 |     #"a-crate-that-is-yanked@0.1.1", # you can also ignore yanked crate versions if you wish
 76 |     #{ crate = "a-crate-that-is-yanked@0.1.1", reason = "you can specify why you are ignoring the yanked crate" },
 77 | ]
 78 | # If this is true, then cargo deny will use the git executable to fetch advisory database.
 79 | # If this is false, then it uses a built-in git library.
 80 | # Setting this to true can be helpful if you have special authentication requirements that cargo-deny does not support.
 81 | # See Git Authentication for more information about setting up git authentication.
 82 | #git-fetch-with-cli = true
 83 | 
 84 | # This section is considered when running `cargo deny check licenses`
 85 | # More documentation for the licenses section can be found here:
 86 | # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
 87 | [licenses]
 88 | # List of explicitly allowed licenses
 89 | # See https://spdx.org/licenses/ for list of possible licenses
 90 | # [possible values: any SPDX 3.11 short identifier (+ optional exception)].
 91 | allow = [
 92 |     "Apache-2.0",
 93 |     "MIT",
 94 |     "Unicode-DFS-2016",
 95 |     "Unicode-3.0",
 96 |     "Unlicense"
 97 | ]
 98 | # The confidence threshold for detecting a license from license text.
 99 | # The higher the value, the more closely the license text must be to the
100 | # canonical license text of a valid SPDX license file.
101 | # [possible values: any between 0.0 and 1.0].
102 | confidence-threshold = 0.8
103 | # Allow 1 or more licenses on a per-crate basis, so that particular licenses
104 | # aren't accepted for every possible crate as with the normal allow list
105 | exceptions = [
106 |     # Each entry is the crate and version constraint, and its specific allow
107 |     # list
108 |     #{ allow = ["Zlib"], crate = "adler32" },
109 | ]
110 | 
111 | # Some crates don't have (easily) machine readable licensing information,
112 | # adding a clarification entry for it allows you to manually specify the
113 | # licensing information
114 | #[[licenses.clarify]]
115 | # The package spec the clarification applies to
116 | #crate = "ring"
117 | # The SPDX expression for the license requirements of the crate
118 | #expression = "MIT AND ISC AND OpenSSL"
119 | # One or more files in the crate's source used as the "source of truth" for
120 | # the license expression. If the contents match, the clarification will be used
121 | # when running the license check, otherwise the clarification will be ignored
122 | # and the crate will be checked normally, which may produce warnings or errors
123 | # depending on the rest of your configuration
124 | #license-files = [
125 | # Each entry is a crate relative path, and the (opaque) hash of its contents
126 | #{ path = "LICENSE", hash = 0xbd0eed23 }
127 | #]
128 | 
129 | [licenses.private]
130 | # If true, ignores workspace crates that aren't published, or are only
131 | # published to private registries.
132 | # To see how to mark a crate as unpublished (to the official registry),
133 | # visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field.
134 | ignore = false
135 | # One or more private registries that you might publish crates to, if a crate
136 | # is only published to private registries, and ignore is true, the crate will
137 | # not have its license(s) checked
138 | registries = [
139 |     #"https://sekretz.com/registry
140 | ]
141 | 
142 | # This section is considered when running `cargo deny check bans`.
143 | # More documentation about the 'bans' section can be found here:
144 | # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
145 | [bans]
146 | # Lint level for when multiple versions of the same crate are detected
147 | multiple-versions = "warn"
148 | # Lint level for when a crate version requirement is `*`
149 | wildcards = "allow"
150 | # The graph highlighting used when creating dotgraphs for crates
151 | # with multiple versions
152 | # * lowest-version - The path to the lowest versioned duplicate is highlighted
153 | # * simplest-path - The path to the version with the fewest edges is highlighted
154 | # * all - Both lowest-version and simplest-path are used
155 | highlight = "all"
156 | # The default lint level for `default` features for crates that are members of
157 | # the workspace that is being checked. This can be overridden by allowing/denying
158 | # `default` on a crate-by-crate basis if desired.
159 | workspace-default-features = "allow"
160 | # The default lint level for `default` features for external crates that are not
161 | # members of the workspace. This can be overridden by allowing/denying `default`
162 | # on a crate-by-crate basis if desired.
163 | external-default-features = "allow"
164 | # List of crates that are allowed. Use with care!
165 | allow = [
166 |     #"ansi_term@0.11.0",
167 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is allowed" },
168 | ]
169 | # List of crates to deny
170 | deny = [
171 |     #"ansi_term@0.11.0",
172 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason it is banned" },
173 |     # Wrapper crates can optionally be specified to allow the crate when it
174 |     # is a direct dependency of the otherwise banned crate
175 |     #{ crate = "ansi_term@0.11.0", wrappers = ["this-crate-directly-depends-on-ansi_term"] },
176 | ]
177 | 
178 | # List of features to allow/deny
179 | # Each entry the name of a crate and a version range. If version is
180 | # not specified, all versions will be matched.
181 | #[[bans.features]]
182 | #crate = "reqwest"
183 | # Features to not allow
184 | #deny = ["json"]
185 | # Features to allow
186 | #allow = [
187 | #    "rustls",
188 | #    "__rustls",
189 | #    "__tls",
190 | #    "hyper-rustls",
191 | #    "rustls",
192 | #    "rustls-pemfile",
193 | #    "rustls-tls-webpki-roots",
194 | #    "tokio-rustls",
195 | #    "webpki-roots",
196 | #]
197 | # If true, the allowed features must exactly match the enabled feature set. If
198 | # this is set there is no point setting `deny`
199 | #exact = true
200 | 
201 | # Certain crates/versions that will be skipped when doing duplicate detection.
202 | skip = [
203 |     #"ansi_term@0.11.0",
204 |     #{ crate = "ansi_term@0.11.0", reason = "you can specify a reason why it can't be updated/removed" },
205 | ]
206 | # Similarly to `skip` allows you to skip certain crates during duplicate
207 | # detection. Unlike skip, it also includes the entire tree of transitive
208 | # dependencies starting at the specified crate, up to a certain depth, which is
209 | # by default infinite.
210 | skip-tree = [
211 |     #"ansi_term@0.11.0", # will be skipped along with _all_ of its direct and transitive dependencies
212 |     #{ crate = "ansi_term@0.11.0", depth = 20 },
213 | ]
214 | 
215 | # This section is considered when running `cargo deny check sources`.
216 | # More documentation about the 'sources' section can be found here:
217 | # https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
218 | [sources]
219 | # Lint level for what to happen when a crate from a crate registry that is not
220 | # in the allow list is encountered
221 | unknown-registry = "warn"
222 | # Lint level for what to happen when a crate from a git repository that is not
223 | # in the allow list is encountered
224 | unknown-git = "warn"
225 | # List of URLs for allowed crate registries. Defaults to the crates.io index
226 | # if not specified. If it is specified but empty, no registries are allowed.
227 | allow-registry = ["https://github.com/rust-lang/crates.io-index"]
228 | # List of URLs for allowed Git repositories
229 | allow-git = []
230 | 
231 | [sources.allow-org]
232 | # 1 or more github.com organizations to allow git sources for
233 | github = [""]
234 | # 1 or more gitlab.com organizations to allow git sources for
235 | gitlab = [""]
236 | # 1 or more bitbucket.org organizations to allow git sources for
237 | bitbucket = [""]
238 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist
  2 | #
  3 | # Copyright 2022-2024, axodotdev
  4 | # SPDX-License-Identifier: MIT or Apache-2.0
  5 | #
  6 | # CI that:
  7 | #
  8 | # * checks for a Git Tag that looks like a release
  9 | # * builds artifacts with dist (archives, installers, hashes)
 10 | # * uploads those artifacts to temporary workflow zip
 11 | # * on success, uploads the artifacts to a GitHub Release
 12 | #
 13 | # Note that the GitHub Release will be created with a generated
 14 | # title/body based on your changelogs.
 15 | 
 16 | name: Release
 17 | permissions:
 18 |   "contents": "write"
 19 | 
 20 | # This task will run whenever you push a git tag that looks like a version
 21 | # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc.
 22 | # Various formats will be parsed into a VERSION and an optional PACKAGE_NAME, where
 23 | # PACKAGE_NAME must be the name of a Cargo package in your workspace, and VERSION
 24 | # must be a Cargo-style SemVer Version (must have at least major.minor.patch).
 25 | #
 26 | # If PACKAGE_NAME is specified, then the announcement will be for that
 27 | # package (erroring out if it doesn't have the given version or isn't dist-able).
 28 | #
 29 | # If PACKAGE_NAME isn't specified, then the announcement will be for all
 30 | # (dist-able) packages in the workspace with that version (this mode is
 31 | # intended for workspaces with only one dist-able package, or with all dist-able
 32 | # packages versioned/released in lockstep).
 33 | #
 34 | # If you push multiple tags at once, separate instances of this workflow will
 35 | # spin up, creating an independent announcement for each one. However, GitHub
 36 | # will hard limit this to 3 tags per commit, as it will assume more tags is a
 37 | # mistake.
 38 | #
 39 | # If there's a prerelease-style suffix to the version, then the release(s)
 40 | # will be marked as a prerelease.
 41 | on:
 42 |   pull_request:
 43 |   push:
 44 |     tags:
 45 |       - '**[0-9]+.[0-9]+.[0-9]+*'
 46 | 
 47 | jobs:
 48 |   # Run 'dist plan' (or host) to determine what tasks we need to do
 49 |   plan:
 50 |     runs-on: "ubuntu-22.04"
 51 |     outputs:
 52 |       val: ${{ steps.plan.outputs.manifest }}
 53 |       tag: ${{ !github.event.pull_request && github.ref_name || '' }}
 54 |       tag-flag: ${{ !github.event.pull_request && format('--tag={0}', github.ref_name) || '' }}
 55 |       publishing: ${{ !github.event.pull_request }}
 56 |     env:
 57 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 58 |     steps:
 59 |       - uses: actions/checkout@v4
 60 |         with:
 61 |           persist-credentials: false
 62 |           submodules: recursive
 63 |       - name: Install dist
 64 |         # we specify bash to get pipefail; it guards against the `curl` command
 65 |         # failing. otherwise `sh` won't catch that `curl` returned non-0
 66 |         shell: bash
 67 |         run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.2/cargo-dist-installer.sh | sh"
 68 |       - name: Cache dist
 69 |         uses: actions/upload-artifact@v4
 70 |         with:
 71 |           name: cargo-dist-cache
 72 |           path: ~/.cargo/bin/dist
 73 |       # sure would be cool if github gave us proper conditionals...
 74 |       # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible
 75 |       # functionality based on whether this is a pull_request, and whether it's from a fork.
 76 |       # (PRs run on the *source* but secrets are usually on the *target* -- that's *good*
 77 |       # but also really annoying to build CI around when it needs secrets to work right.)
 78 |       - id: plan
 79 |         run: |
 80 |           dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json
 81 |           echo "dist ran successfully"
 82 |           cat plan-dist-manifest.json
 83 |           echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT"
 84 |       - name: "Upload dist-manifest.json"
 85 |         uses: actions/upload-artifact@v4
 86 |         with:
 87 |           name: artifacts-plan-dist-manifest
 88 |           path: plan-dist-manifest.json
 89 | 
 90 |   # Build and packages all the platform-specific things
 91 |   build-local-artifacts:
 92 |     name: build-local-artifacts (${{ join(matrix.targets, ', ') }})
 93 |     # Let the initial task tell us to not run (currently very blunt)
 94 |     needs:
 95 |       - plan
 96 |     if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }}
 97 |     strategy:
 98 |       fail-fast: false
 99 |       # Target platforms/runners are computed by dist in create-release.
100 |       # Each member of the matrix has the following arguments:
101 |       #
102 |       # - runner: the github runner
103 |       # - dist-args: cli flags to pass to dist
104 |       # - install-dist: expression to run to install dist on the runner
105 |       #
106 |       # Typically there will be:
107 |       # - 1 "global" task that builds universal installers
108 |       # - N "local" tasks that build each platform's binaries and platform-specific installers
109 |       matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }}
110 |     runs-on: ${{ matrix.runner }}
111 |     container: ${{ matrix.container && matrix.container.image || null }}
112 |     env:
113 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
114 |       BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json
115 |     steps:
116 |       - name: enable windows longpaths
117 |         run: |
118 |           git config --global core.longpaths true
119 |       - uses: actions/checkout@v4
120 |         with:
121 |           persist-credentials: false
122 |           submodules: recursive
123 |       - name: Install Rust non-interactively if not already installed
124 |         if: ${{ matrix.container }}
125 |         run: |
126 |           if ! command -v cargo > /dev/null 2>&1; then
127 |             curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
128 |             echo "$HOME/.cargo/bin" >> $GITHUB_PATH
129 |           fi
130 |       - name: Install dist
131 |         run: ${{ matrix.install_dist.run }}
132 |       # Get the dist-manifest
133 |       - name: Fetch local artifacts
134 |         uses: actions/download-artifact@v4
135 |         with:
136 |           pattern: artifacts-*
137 |           path: target/distrib/
138 |           merge-multiple: true
139 |       - name: Install dependencies
140 |         run: |
141 |           ${{ matrix.packages_install }}
142 |       - name: Build artifacts
143 |         run: |
144 |           # Actually do builds and make zips and whatnot
145 |           dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json
146 |           echo "dist ran successfully"
147 |       - id: cargo-dist
148 |         name: Post-build
149 |         # We force bash here just because github makes it really hard to get values up
150 |         # to "real" actions without writing to env-vars, and writing to env-vars has
151 |         # inconsistent syntax between shell and powershell.
152 |         shell: bash
153 |         run: |
154 |           # Parse out what we just built and upload it to scratch storage
155 |           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
156 |           dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT"
157 |           echo "EOF" >> "$GITHUB_OUTPUT"
158 | 
159 |           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
160 |       - name: "Upload artifacts"
161 |         uses: actions/upload-artifact@v4
162 |         with:
163 |           name: artifacts-build-local-${{ join(matrix.targets, '_') }}
164 |           path: |
165 |             ${{ steps.cargo-dist.outputs.paths }}
166 |             ${{ env.BUILD_MANIFEST_NAME }}
167 | 
168 |   # Build and package all the platform-agnostic(ish) things
169 |   build-global-artifacts:
170 |     needs:
171 |       - plan
172 |       - build-local-artifacts
173 |     runs-on: "ubuntu-22.04"
174 |     env:
175 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
176 |       BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json
177 |     steps:
178 |       - uses: actions/checkout@v4
179 |         with:
180 |           persist-credentials: false
181 |           submodules: recursive
182 |       - name: Install cached dist
183 |         uses: actions/download-artifact@v4
184 |         with:
185 |           name: cargo-dist-cache
186 |           path: ~/.cargo/bin/
187 |       - run: chmod +x ~/.cargo/bin/dist
188 |       # Get all the local artifacts for the global tasks to use (for e.g. checksums)
189 |       - name: Fetch local artifacts
190 |         uses: actions/download-artifact@v4
191 |         with:
192 |           pattern: artifacts-*
193 |           path: target/distrib/
194 |           merge-multiple: true
195 |       - id: cargo-dist
196 |         shell: bash
197 |         run: |
198 |           dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json
199 |           echo "dist ran successfully"
200 | 
201 |           # Parse out what we just built and upload it to scratch storage
202 |           echo "paths<<EOF" >> "$GITHUB_OUTPUT"
203 |           jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT"
204 |           echo "EOF" >> "$GITHUB_OUTPUT"
205 | 
206 |           cp dist-manifest.json "$BUILD_MANIFEST_NAME"
207 |       - name: "Upload artifacts"
208 |         uses: actions/upload-artifact@v4
209 |         with:
210 |           name: artifacts-build-global
211 |           path: |
212 |             ${{ steps.cargo-dist.outputs.paths }}
213 |             ${{ env.BUILD_MANIFEST_NAME }}
214 |   # Determines if we should publish/announce
215 |   host:
216 |     needs:
217 |       - plan
218 |       - build-local-artifacts
219 |       - build-global-artifacts
220 |     # Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine)
221 |     if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }}
222 |     env:
223 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
224 |     runs-on: "ubuntu-22.04"
225 |     outputs:
226 |       val: ${{ steps.host.outputs.manifest }}
227 |     steps:
228 |       - uses: actions/checkout@v4
229 |         with:
230 |           persist-credentials: false
231 |           submodules: recursive
232 |       - name: Install cached dist
233 |         uses: actions/download-artifact@v4
234 |         with:
235 |           name: cargo-dist-cache
236 |           path: ~/.cargo/bin/
237 |       - run: chmod +x ~/.cargo/bin/dist
238 |       # Fetch artifacts from scratch-storage
239 |       - name: Fetch artifacts
240 |         uses: actions/download-artifact@v4
241 |         with:
242 |           pattern: artifacts-*
243 |           path: target/distrib/
244 |           merge-multiple: true
245 |       - id: host
246 |         shell: bash
247 |         run: |
248 |           dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json
249 |           echo "artifacts uploaded and released successfully"
250 |           cat dist-manifest.json
251 |           echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT"
252 |       - name: "Upload dist-manifest.json"
253 |         uses: actions/upload-artifact@v4
254 |         with:
255 |           # Overwrite the previous copy
256 |           name: artifacts-dist-manifest
257 |           path: dist-manifest.json
258 |       # Create a GitHub Release while uploading all files to it
259 |       - name: "Download GitHub Artifacts"
260 |         uses: actions/download-artifact@v4
261 |         with:
262 |           pattern: artifacts-*
263 |           path: artifacts
264 |           merge-multiple: true
265 |       - name: Cleanup
266 |         run: |
267 |           # Remove the granular manifests
268 |           rm -f artifacts/*-dist-manifest.json
269 |       - name: Create GitHub Release
270 |         env:
271 |           PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}"
272 |           ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}"
273 |           ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}"
274 |           RELEASE_COMMIT: "${{ github.sha }}"
275 |         run: |
276 |           # Write and read notes from a file to avoid quoting breaking things
277 |           echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt
278 | 
279 |           gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/*
280 | 
281 |   announce:
282 |     needs:
283 |       - plan
284 |       - host
285 |     # use "always() && ..." to allow us to wait for all publish jobs while
286 |     # still allowing individual publish jobs to skip themselves (for prereleases).
287 |     # "host" however must run to completion, no skipping allowed!
288 |     if: ${{ always() && needs.host.result == 'success' }}
289 |     runs-on: "ubuntu-22.04"
290 |     env:
291 |       GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
292 |     steps:
293 |       - uses: actions/checkout@v4
294 |         with:
295 |           persist-credentials: false
296 |           submodules: recursive
297 | 


--------------------------------------------------------------------------------
/src/list_manipulations.rs:
--------------------------------------------------------------------------------
  1 | use crate::count_characters;
  2 | use crate::edit_distance::find_edit_distance;
  3 | use crate::schlinkert_pruning::get_sardinas_patterson_final_intersection;
  4 | use memchr::memchr;
  5 | use unicode_normalization::UnicodeNormalization;
  6 | 
  7 | /// Normalize the Unicode of a string
  8 | /// See <https://docs.rs/unicode-normalization/latest/unicode_normalization/trait.UnicodeNormalization.html#tymethod.nfc>
  9 | pub fn normalize_unicode(word: &str, nf: &str) -> Result<String, String> {
 10 |     match nf.to_lowercase().as_str() {
 11 |         "nfc" => Ok(word.nfc().collect()),
 12 |         "nfd" => Ok(word.nfd().collect()),
 13 |         "nfkc" => Ok(word.nfkc().collect()),
 14 |         "nfkd" => Ok(word.nfkd().collect()),
 15 |         _ => Err("Unknown Unicode Normalization Form received in arguments.\nPlease use one of the following normalization forms: nfc, nfd, nfkc, or nfkd.".to_string()),
 16 |     }
 17 | }
 18 | 
 19 | // use core::cmp::Ordering;
 20 | use icu::collator::{options::*, *};
 21 | // use icu::locale::locale;
 22 | use icu::locale::Locale;
 23 | /// Sort a Vector of words alphabetically, taking into account the locale of the words
 24 | /// `.sorted()` words -> ["Zambia", "abbey", "eager", "enlever", "ezra", "zoo", "énigme"]
 25 | /// sort_carefully words -> ["abbey", "eager", "énigme", "enlever", "ezra", "Zambia", "zoo"]
 26 | pub fn sort_carefully(list: Vec<String>, loc: Locale) -> Vec<String> {
 27 |     // Examples:  https://github.com/unicode-org/icu4x/tree/main/components/collator#examples
 28 |     // Reference: https://docs.rs/icu/latest/icu/collator/index.html
 29 |     //            https://docs.rs/icu/latest/icu/locale/struct.Locale.html
 30 |     let mut options = CollatorOptions::default();
 31 |     options.strength = Some(Strength::Secondary); // Note this is not the locally defined passphrase Strength!
 32 |     let collator = Collator::try_new(loc.into(), options).unwrap();
 33 | 
 34 |     let mut newly_sorted_list = list;
 35 |     newly_sorted_list.sort_by(|a, b| collator.compare(a, b));
 36 |     newly_sorted_list
 37 | }
 38 | 
 39 | /// Sort by word length, with longest words first. For words of equal length, sorts
 40 | /// word alphabetically, respecting inputted locale.
 41 | pub fn sort_by_length(list: Vec<String>, loc: Locale) -> Vec<String> {
 42 |     // Set up the collator again
 43 |     let mut options = CollatorOptions::default();
 44 |     options.strength = Some(Strength::Secondary);
 45 |     // let collator = Collator::try_new(locale, options).unwrap();
 46 |     let collator = Collator::try_new(loc.into(), options).unwrap();
 47 | 
 48 |     let mut list = list;
 49 |     // Order by count_characters(w) descending, then within that,
 50 |     // alphabetically
 51 |     list.sort_by(|word1, word2| {
 52 |         count_characters(word2)
 53 |             .cmp(&count_characters(word1))
 54 |             .then_with(|| collator.compare(word1, word2))
 55 |     });
 56 |     list
 57 | }
 58 | 
 59 | /// Given a String (a word), delete all integers from the word.
 60 | pub fn delete_integers(mut word: String) -> String {
 61 |     word.retain(|c| !c.is_numeric());
 62 |     word
 63 | }
 64 | 
 65 | /// Given a String (a word), delete all characters that are not
 66 | /// alphanumeric
 67 | /// ```
 68 | /// use tidy::list_manipulations::delete_nonalphanumeric;
 69 | /// assert_eq!(delete_nonalphanumeric("Hello!".to_string()), "Hello");
 70 | /// assert_eq!(delete_nonalphanumeric("world824...".to_string()), "world824");
 71 | /// ```
 72 | pub fn delete_nonalphanumeric(mut word: String) -> String {
 73 |     word.retain(|c| c.is_alphanumeric());
 74 |     word
 75 | }
 76 | 
 77 | /// Delete all characters through and including the first appearance
 78 | /// of character `ch` in inputted `&str` `s`. Program uses this to
 79 | /// remove character through first tab or first space, a common task
 80 | /// when dealing with diceware passphrase word lists that have dice roll
 81 | /// numbers before each word. The
 82 | /// [EFF long list](https://www.eff.org/files/2016/07/18/eff_large_wordlist.txt)
 83 | /// is one such example.
 84 | ///
 85 | /// Uses [memchr library](https://docs.rs/memchr/latest/memchr/)
 86 | /// to find this character a bit quicker than standard function.
 87 | ///
 88 | /// I outlined other approaches to this function in
 89 | /// [a separate repo](https://github.com/sts10/splitter/blob/main/src/lib.rs).
 90 | pub fn delete_before_first_char(s: &str, ch: char) -> &str {
 91 |     match memchr(ch as u8, s.as_bytes()) {
 92 |         None => s, // not found => return the whole string
 93 |         Some(pos) => &s[pos + 1..],
 94 |     }
 95 | }
 96 | 
 97 | /// Delete all characters after and including the first appearance
 98 | /// of character `ch` in inputted `&str` `s`.
 99 | ///
100 | /// Uses [memchr library](https://docs.rs/memchr/latest/memchr/)
101 | /// to find this character a bit quicker than standard function.
102 | ///
103 | /// I outlined other approaches to this function in
104 | /// [a separate repo](https://github.com/sts10/splitter/blob/main/src/lib.rs).
105 | pub fn delete_after_first_char(s: &str, ch: char) -> &str {
106 |     match memchr(ch as u8, s.as_bytes()) {
107 |         None => s, // delimiting character not found in string s, so return the whole string
108 |         Some(pos) => &s[0..pos],
109 |     }
110 | }
111 | 
112 | use std::collections::HashMap;
113 | /// This function removes words from the given word list
114 | /// such that the resulting, outputted list has a guaranteed
115 | /// maximum prefix length.  
116 | ///
117 | /// As an example, if `maximum_shared_prefix_length == 4`, this
118 | /// means that on the resulting list, knowing the first 4 characters
119 | /// of any word on the generated list is sufficient to know which
120 | /// word it is. In this case, we'd know that if a word starts with
121 | /// "radi", we know it must be the word "radius" (if "radical" had been
122 | /// on the list, this function would have removed it).
123 | ///
124 | /// This is useful if you intend the list to be used by software that
125 | /// uses auto-complete. In the case described above, a user will only have to type the
126 | /// first 4 characters of any word before a program could successfully
127 | /// auto-complete the entire word.
128 | pub fn guarantee_maximum_prefix_length(
129 |     list: &[String],
130 |     maximum_shared_prefix_length: usize,
131 | ) -> Vec<String> {
132 |     let mut prefix_hashmap: HashMap<String, String> = HashMap::new();
133 |     for this_word in list {
134 |         // If this word is too short just skip it.
135 |         if count_characters(this_word) < maximum_shared_prefix_length {
136 |             continue;
137 |         }
138 |         prefix_hashmap
139 |             .entry(get_prefix(this_word, maximum_shared_prefix_length))
140 |             .and_modify(|existing_word| {
141 |                 // Prefer shorter words, as a stand-in for simplicity (though that
142 |                 // is debatable...)
143 |                 if count_characters(this_word) < count_characters(existing_word) {
144 |                     *existing_word = this_word.to_string()
145 |                 }
146 |             })
147 |             .or_insert_with(|| this_word.to_string());
148 |     }
149 |     let new_vec: Vec<(&String, &String)> = prefix_hashmap.iter().collect();
150 |     let mut new_word_list = vec![];
151 |     for t in new_vec {
152 |         new_word_list.push(t.1.to_string());
153 |     }
154 |     new_word_list
155 | }
156 | 
157 | /// Executes Schlinkert prune. Attempts to make list uniquely decodable
158 | /// by removing the fewest number of code words possible. Adapted from
159 | /// Sardinas-Patterson algorithm.
160 | /// Runs word list both as given and with each word reversed, preferring
161 | /// which ever preserves more words from the given list.
162 | pub fn schlinkert_prune(list: &[String]) -> Vec<String> {
163 |     // Clumsily clone the list into a new variable.
164 |     let mut new_list = list.to_owned();
165 |     // First, simply find the "offenders" with the list as given.
166 |     let offenders_to_remove_forwards = get_sardinas_patterson_final_intersection(list);
167 |     // Now, reverse all words before running the Schlinkert prune.
168 |     // This will give a different list of offending words -- and potentially FEWER
169 |     // than running the prune forwards. (We call reverse_all_words function
170 |     // twice because we have to un-reverse all the offending words at the end.)
171 |     let offenders_to_remove_backwards = reverse_all_words(
172 |         &get_sardinas_patterson_final_intersection(&reverse_all_words(list)),
173 |     );
174 |     // If running the prune on the reversed words yielded fewer offenders
175 |     // we'll remove those offending words, since our goal is to remove
176 |     // the fewest number of words as possible.
177 |     if offenders_to_remove_forwards.len() <= offenders_to_remove_backwards.len() {
178 |         new_list.retain(|x| !offenders_to_remove_forwards.contains(x));
179 |     } else {
180 |         new_list.retain(|x| !offenders_to_remove_backwards.contains(x));
181 |     }
182 |     new_list
183 | }
184 | 
185 | /// Reverse all words on given list. For example,
186 | /// `["hotdog", "hamburger", "alligator"]` becomes
187 | /// `["godtoh", "regrubmah", "rotagilla"]`
188 | /// Uses graphemes to ensure it handles accented characters correctly.
189 | pub fn reverse_all_words(list: &[String]) -> Vec<String> {
190 |     let mut reversed_list = vec![];
191 |     for word in list {
192 |         reversed_list.push(word.graphemes(true).rev().collect::<String>());
193 |     }
194 |     reversed_list
195 | }
196 | 
197 | use unicode_segmentation::UnicodeSegmentation;
198 | /// Given a word and a `usize` of `length`, this function returns
199 | /// the first `length` characters of that word. This length is
200 | /// measured in grapheme clusters to better handle accented
201 | /// characters and emoji.
202 | /// ```
203 | /// use tidy::list_manipulations::get_prefix;
204 | /// assert_eq!(get_prefix("hello world", 4), "hell");
205 | /// assert_eq!(get_prefix("sécréter", 5), "sécré");
206 | /// assert_eq!(get_prefix("😀😃😄😁😆", 2), "😀😃");
207 | /// ```
208 | pub fn get_prefix(word: &str, length: usize) -> String {
209 |     word.graphemes(true).take(length).collect::<String>()
210 | }
211 | 
212 | /// Helper function to determine if a given char as `u16` is a
213 | /// Latin letter (A through Z or a through z, no diacritics).
214 | /// ```
215 | /// use tidy::list_manipulations::is_latin_alphabetic;
216 | /// assert_eq!(is_latin_alphabetic('h' as u16), true);
217 | /// assert_eq!(is_latin_alphabetic('A' as u16), true);
218 | /// assert_eq!(is_latin_alphabetic('1' as u16), false);
219 | /// assert_eq!(is_latin_alphabetic(',' as u16), false);
220 | /// assert_eq!(is_latin_alphabetic('é' as u16), false);
221 | /// assert_eq!(is_latin_alphabetic('ő' as u16), false);
222 | /// ```
223 | pub fn is_latin_alphabetic(chr: u16) -> bool {
224 |     (chr >= 65 && chr <= 90) || (chr >= 97 && chr <= 122)
225 | }
226 | 
227 | /// Replaces curly or smart quotes with straight quotes.
228 | pub fn straighten_quotes(input: &str) -> String {
229 |     let mut result = String::new();
230 |     for c in input.chars() {
231 |         match c {
232 |             '\u{201C}' => result.push('\"'), // left double quotation mark
233 |             '\u{201D}' => result.push('\"'), // right double quotation mark
234 |             '\u{2018}' => result.push('\''), // left single quotation mark
235 |             '\u{2019}' => result.push('\''), // right single quotation mark
236 |             _ => result.push(c),
237 |         }
238 |     }
239 |     result
240 | }
241 | 
242 | use itertools::Itertools;
243 | /// De-duplicates a Vector of `String`s while maintaining list order.
244 | pub fn dedup_without_sorting(list: &mut [String]) -> Vec<String> {
245 |     let dedup: Vec<String> = list.iter().unique().map(|s| s.to_string()).collect();
246 |     dedup.to_vec()
247 | }
248 | 
249 | /// Remove prefix words from the given Vector of `String`s.
250 | ///
251 | /// A brief example: If both "news" and "newspaper" are on the inputted list
252 | /// we may, for security reasons, want to remove the prefix word,
253 | /// which is "news" in this case.
254 | pub fn remove_prefix_words(list: Vec<String>) -> Vec<String> {
255 |     let mut list_without_prefix_words = list.to_vec();
256 |     list_without_prefix_words.retain(|potential_prefix_word| {
257 |         for word in &list {
258 |             if word.starts_with(potential_prefix_word) && word != potential_prefix_word {
259 |                 // This is a prefix word, so we do NOT want to retain it. return false to the
260 |                 // retain
261 |                 return false;
262 |             } else {
263 |                 // This particular word is not a prefix word of this potential_prefix_word.
264 |                 // keep looping
265 |                 continue;
266 |             };
267 |         }
268 |         // If we've made it here, we can be sure that potential_prefix_word is NOT a
269 |         // prefix word. So we want to retain it for the list_without_prefix_words.
270 |         // To do this, we return true to the retain.
271 |         true
272 |     });
273 |     list_without_prefix_words
274 | }
275 | 
276 | /// Remove suffix words from the given Vector of `String`s.
277 | ///
278 | /// A brief example: If both "news" and "newspaper" are on the inputted list
279 | /// we may, for security reasons, want to remove the suffix word,
280 | /// which is "paper" in this case.
281 | pub fn remove_suffix_words(list: Vec<String>) -> Vec<String> {
282 |     let mut list_without_suffix_words = list.to_vec();
283 |     list_without_suffix_words.retain(|potential_suffix_word| {
284 |         for word in &list {
285 |             if word.ends_with(potential_suffix_word) && word != potential_suffix_word {
286 |                 // This is a suffix word, so we do NOT want to retain it. return false to the
287 |                 // retain
288 |                 return false;
289 |             } else {
290 |                 // This particular word is not a suffix word of this potential_suffix_word.
291 |                 // keep looping
292 |                 continue;
293 |             };
294 |         }
295 |         // If we've made it here, we can be sure that potential_suffix_word is NOT a
296 |         // suffix word. So we want to retain it for the list_without_suffix_words.
297 |         // To do this, we return true to the retain.
298 |         true
299 |     });
300 |     list_without_suffix_words
301 | }
302 | 
303 | /// Only retain words that are the given `minimum_edit_distance` away from all
304 | /// other words on the list.
305 | ///
306 | /// Calulates edit distance using a function in the edit_distance module.
307 | pub fn enfore_minimum_edit_distance(
308 |     list: Vec<String>,
309 |     minimum_edit_distance: usize,
310 | ) -> Vec<String> {
311 |     let minimum_edit_distance: u32 = minimum_edit_distance.try_into().unwrap();
312 |     let mut list_to_read = list.to_vec();
313 |     // Sort short words first to prefer them
314 |     list_to_read.sort_by_key(|a| count_characters(a));
315 | 
316 |     let mut new_list = list.to_vec();
317 |     new_list.retain(|potential_too_close_word| {
318 |         for word in &list_to_read {
319 |             // Skip if we're looking at the same word
320 |             if word == potential_too_close_word {
321 |                 continue;
322 |             }
323 |             if find_edit_distance(word, potential_too_close_word) < minimum_edit_distance {
324 |                 // This potential_too_close_word is too close to another word on the list,
325 |                 // so we do NOT want to retain it.
326 |                 // return false to the retain
327 |                 return false;
328 |             } else {
329 |                 // This particular word is not too close to this potential_too_close_word.
330 |                 // keep looping
331 |                 continue;
332 |             };
333 |         }
334 |         // If we've made it here, we can be sure that potential_too_close_word is NOT too
335 |         // close to another  word. So we want to retain it for the new_list.
336 |         // To do this, we return true to the retain.
337 |         true
338 |     });
339 |     new_list
340 | }
341 | 
342 | /// Takes the inputted word list and a Vector of tuples of Strings,
343 | /// each representing a pair of homophones, e.g. `("there", "their")`.
344 | /// The function outputs a new list in which, if both homophones
345 | /// are detected, the second homophone is removed.
346 | pub fn remove_homophones(list: Vec<String>, homophones: Vec<(String, String)>) -> Vec<String> {
347 |     let mut words_to_remove = vec![];
348 |     for pair_of_homophones in homophones {
349 |         if list.contains(&pair_of_homophones.0)
350 |             && list.contains(&pair_of_homophones.1)
351 |             && !(words_to_remove.contains(&pair_of_homophones.0)
352 |                 || words_to_remove.contains(&pair_of_homophones.1))
353 |         {
354 |             words_to_remove.push(pair_of_homophones.1);
355 |         }
356 |     }
357 |     let mut new_list = list.to_vec();
358 |     new_list.retain(|w| !words_to_remove.contains(w));
359 |     new_list
360 | }
361 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // use core::cmp::Ordering;
  2 | use icu::locale::Locale;
  3 | // use icu::locale::locale;
  4 | use rand::prelude::SliceRandom;
  5 | use rand::rng;
  6 | pub mod cards;
  7 | pub mod dice;
  8 | pub mod display_information;
  9 | pub mod edit_distance;
 10 | pub mod file_readers;
 11 | pub mod file_writer;
 12 | pub mod list_manipulations;
 13 | pub mod parsers;
 14 | pub mod schlinkert_pruning;
 15 | use crate::list_manipulations::*;
 16 | 
 17 | #[derive(Default, Debug, Clone)]
 18 | pub struct TidyRequest {
 19 |     pub list: Vec<String>,
 20 |     pub take_first: Option<usize>,
 21 |     pub take_rand: Option<usize>,
 22 |     pub sort_alphabetically: bool,
 23 |     pub sort_by_length: bool,
 24 |     pub ignore_after_delimiter: Option<char>,
 25 |     pub ignore_before_delimiter: Option<char>,
 26 |     pub normalization_form: Option<String>,
 27 |     pub locale: String, // defaults to en-US
 28 |     pub to_lowercase: bool,
 29 |     pub should_straighten_quotes: bool,
 30 |     pub should_remove_prefix_words: bool,
 31 |     pub should_remove_suffix_words: bool,
 32 |     pub should_schlinkert_prune: bool,
 33 |     pub should_remove_nonalphanumeric: bool,
 34 |     pub should_delete_nonalphanumeric: bool,
 35 |     pub should_remove_nonalphabetic: bool,
 36 |     pub should_remove_non_latin_alphabetic: bool,
 37 |     pub should_remove_nonascii: bool,
 38 |     pub should_remove_integers: bool,
 39 |     pub should_delete_integers: bool,
 40 |     pub should_delete_after_first_delimiter: Option<char>,
 41 |     pub should_delete_before_first_delimiter: Option<char>,
 42 |     pub reject_list: Option<Vec<String>>,
 43 |     pub approved_list: Option<Vec<String>>,
 44 |     pub homophones_list: Option<Vec<(String, String)>>,
 45 |     pub minimum_length: Option<usize>,
 46 |     pub maximum_length: Option<usize>,
 47 |     pub maximum_shared_prefix_length: Option<usize>,
 48 |     pub minimum_edit_distance: Option<usize>,
 49 |     pub print_rand: Option<usize>,
 50 |     pub print_first: Option<usize>,
 51 | }
 52 | 
 53 | #[derive(PartialEq)]
 54 | enum MetadataPosition {
 55 |     Start,
 56 |     End,
 57 | }
 58 | 
 59 | /// Simple helper function that splits a `str` by a given substring `str`,
 60 | /// Then returns a Vector of `str`s.
 61 | /// ```
 62 | /// use tidy::split_and_vectorize;
 63 | /// assert_eq!(split_and_vectorize("a:b:c",":"), vec!["a","b","c"]);
 64 | /// ```
 65 | /// I find this a handy general helper function.
 66 | pub fn split_and_vectorize<'a>(string_to_split: &'a str, splitter: &str) -> Vec<&'a str> {
 67 |     string_to_split.split(splitter).collect()
 68 | }
 69 | 
 70 | /// This is the large, key function of the program. It takes
 71 | /// a `TidyRequest` object -- which includes the word list --
 72 | /// and performs whatever functions the user has requesteed to
 73 | /// perform on the list.
 74 | pub fn tidy_list(req: TidyRequest) -> Vec<String> {
 75 |     // First, we need to do the two truncations
 76 |     let mut list_to_tidy = req.list.clone();
 77 |     list_to_tidy = match req.take_first {
 78 |         Some(amount_to_take) => {
 79 |             list_to_tidy.truncate(amount_to_take);
 80 |             list_to_tidy
 81 |         }
 82 |         None => list_to_tidy,
 83 |     };
 84 |     list_to_tidy = match req.take_rand {
 85 |         Some(amount_to_take) => {
 86 |             let mut rng = rng();
 87 |             list_to_tidy.shuffle(&mut rng);
 88 |             list_to_tidy.truncate(amount_to_take);
 89 |             list_to_tidy
 90 |         }
 91 |         None => list_to_tidy,
 92 |     };
 93 |     let mut tidied_list = vec![];
 94 |     // Now we go word-by-word
 95 |     for word in &list_to_tidy {
 96 |         // METADATA-IGNORING WORD REMOVALS
 97 |         // If user chose to ignore metadata, split the line into the word and the metadata
 98 |         // based on given delimiter. Note that metadata may come before or after the word.
 99 |         // We'll then do removals operations on the "word", ignoring metadata.
100 |         // Later, we'll re-add the metadata to the word.
101 | 
102 |         // We need delimiter to have a broad scope so that we can use it
103 |         // when we re-add the metadata at the end. Default to comma, but can be changed
104 |         // in match statement here.
105 |         let (mut new_word, delimiter, metadata, metadata_position) =
106 |             match (req.ignore_after_delimiter, req.ignore_before_delimiter) {
107 |                 (Some(delimiter), None) => {
108 |                     // Parse delimiter. Currently this converts 's' to ' '
109 |                     // and 't' to '\t'.
110 |                     let delimiter = parse_delimiter(delimiter).unwrap();
111 |                     let split_vec = split_and_vectorize(word, &delimiter.to_string());
112 |                     if split_vec.len() == 1 {
113 |                         eprintln!("No metadata found for word: {:?}", word);
114 |                         (word.to_string(), Some(delimiter), None, None)
115 |                     } else {
116 |                         (
117 |                             split_vec[0].to_string(),
118 |                             Some(delimiter),
119 |                             Some(split_vec[1]),
120 |                             Some(MetadataPosition::End),
121 |                         )
122 |                     }
123 |                 }
124 |                 (None, Some(delimiter)) => {
125 |                     let delimiter = parse_delimiter(delimiter).unwrap();
126 |                     let split_vec = split_and_vectorize(word, &delimiter.to_string());
127 |                     if split_vec.len() == 1 {
128 |                         eprintln!("No metadata found for word: {:?}", word);
129 |                         (word.to_string(), Some(delimiter), None, None)
130 |                     } else {
131 |                         (
132 |                             split_vec[1].to_string(),
133 |                             Some(delimiter),
134 |                             Some(split_vec[0]),
135 |                             Some(MetadataPosition::Start),
136 |                         )
137 |                     }
138 |                 }
139 |                 (Some(ref _delimiter1), Some(ref _delimiter2)) => {
140 |                     // This situation should be caught and handled better
141 |                     // in src/main.rs, so this is really just in case.
142 |                     panic!("Can't ignore metadata on both sides currently")
143 |                 }
144 |                 (None, None) => (word.to_string(), None, None, None),
145 |             };
146 | 
147 |         // Trim new word, then normalize unicode if user gave an
148 |         // nromalization form to use
149 |         new_word = match &req.normalization_form {
150 |             Some(nf) => match normalize_unicode(new_word.trim(), nf) {
151 |                 Ok(word) => word,
152 |                 Err(e) => panic!("{}", e),
153 |             },
154 |             None => {
155 |                 // still need to trim
156 |                 new_word.trim().to_string()
157 |             }
158 |         };
159 | 
160 |         // WORD MODIFICATIONS
161 |         // For logic reasons, it's crucial that Tidy perform these word
162 |         // modifications BEFORE it runs word removals.
163 |         // If user has chosen to Ignore Metadata, we're guranteed
164 |         // that all of these will be None, so we don't have to worry
165 |         // about metadata loss due to de-duplication caused by word modification.
166 |         new_word = match req.should_delete_before_first_delimiter {
167 |             Some(delimiter) => {
168 |                 delete_before_first_char(&new_word, parse_delimiter(delimiter).unwrap()).to_string()
169 |             }
170 |             None => new_word,
171 |         };
172 |         new_word = match req.should_delete_after_first_delimiter {
173 |             Some(delimiter) => {
174 |                 delete_after_first_char(&new_word, parse_delimiter(delimiter).unwrap()).to_string()
175 |             }
176 |             None => new_word,
177 |         };
178 |         if req.should_delete_integers && new_word.chars().any(|c| c.is_numeric()) {
179 |             new_word = delete_integers(new_word.to_string());
180 |         }
181 |         if req.should_delete_nonalphanumeric && new_word.chars().any(|c| c.is_alphanumeric()) {
182 |             new_word = delete_nonalphanumeric(new_word.to_string());
183 |         }
184 |         if req.to_lowercase {
185 |             new_word = new_word.to_ascii_lowercase();
186 |         }
187 |         if req.should_straighten_quotes {
188 |             new_word = straighten_quotes(&new_word).to_string();
189 |         }
190 | 
191 |         new_word = new_word.trim().to_string();
192 | 
193 |         // WORD REMOVALS
194 |         // Now that the words have been modified, we can move on to
195 |         // word removals.
196 |         // IF user has chosen to ignore any metadata, these should be the
197 |         // first edits that we do.
198 |         if req.should_remove_nonascii {
199 |             // https://doc.rust-lang.org/std/primitive.str.html#method.is_ascii
200 |             if !new_word.is_ascii() {
201 |                 // If we're here, that means we already know that we
202 |                 // do NOT want to add this word to our ouputted list.
203 |                 // So we can just skip to the next word in our loop.
204 |                 continue;
205 |             }
206 |         }
207 |         // For these other checks, we have to iterate through every individual
208 |         // character (char) of each word
209 |         if req.should_remove_nonalphanumeric && new_word.chars().any(|c| !c.is_alphanumeric()) {
210 |             continue;
211 |         }
212 |         if req.should_remove_nonalphabetic && new_word.chars().any(|c| !c.is_alphabetic()) {
213 |             continue;
214 |         }
215 |         if req.should_remove_non_latin_alphabetic
216 |             && new_word.chars().any(|chr| !is_latin_alphabetic(chr as u16))
217 |         {
218 |             continue;
219 |         }
220 |         if req.should_remove_integers && new_word.chars().any(|c| c.is_numeric()) {
221 |             continue;
222 |         }
223 |         if let Some(ref reject_list) = req.reject_list {
224 |             if reject_list.contains(&new_word) {
225 |                 continue;
226 |             }
227 |         }
228 | 
229 |         if let Some(ref approved_list) = req.approved_list {
230 |             if !approved_list.contains(&new_word) {
231 |                 continue;
232 |             }
233 |         };
234 | 
235 |         if let Some(minimum_length) = req.minimum_length {
236 |             if count_characters(&new_word) < minimum_length {
237 |                 continue;
238 |             }
239 |         };
240 | 
241 |         if let Some(maximum_length) = req.maximum_length {
242 |             if count_characters(&new_word) > maximum_length {
243 |                 continue;
244 |             }
245 |         };
246 | 
247 |         // trim whitespace
248 |         new_word = new_word.trim().to_string();
249 | 
250 |         // If there was metadata, re-add it to the word now.
251 |         if !new_word.is_empty() {
252 |             if let Some(metadata) = metadata {
253 |                 if metadata_position == Some(MetadataPosition::End) {
254 |                     new_word = new_word + &delimiter.unwrap().to_string() + metadata;
255 |                 } else if metadata_position == Some(MetadataPosition::Start) {
256 |                     new_word = metadata.to_owned() + &delimiter.unwrap().to_string() + &new_word;
257 |                 }
258 |             };
259 |         }
260 | 
261 |         // trim whitespace again
262 |         new_word = new_word.trim().to_string();
263 |         // The trim calls could have made new_word empty
264 |         // so need to check again
265 |         if !new_word.is_empty() {
266 |             tidied_list.push(new_word);
267 |         }
268 |     }
269 |     // Now truncate list, if requested
270 |     // Some operations are just a bit too complex for
271 |     // me to figure out how to do on a per-word basis
272 |     // at this time. Maybe something to revisit in the
273 |     // future
274 |     tidied_list = match req.homophones_list {
275 |         Some(homophones_list) => remove_homophones(tidied_list, homophones_list),
276 |         None => tidied_list,
277 |     };
278 |     // I think this is a good order for these next few operations,
279 |     // but I'm not super confident
280 |     tidied_list = match req.maximum_shared_prefix_length {
281 |         Some(maximum_shared_prefix_length) => {
282 |             guarantee_maximum_prefix_length(&tidied_list, maximum_shared_prefix_length)
283 |         }
284 |         None => tidied_list,
285 |     };
286 |     tidied_list = match req.minimum_edit_distance {
287 |         Some(minimum_edit_distance) => {
288 |             enfore_minimum_edit_distance(tidied_list, minimum_edit_distance)
289 |         }
290 |         None => tidied_list,
291 |     };
292 |     tidied_list = if req.should_remove_suffix_words {
293 |         remove_suffix_words(dedup_without_sorting(&mut tidied_list))
294 |     } else {
295 |         tidied_list
296 |     };
297 |     tidied_list = if req.should_remove_prefix_words {
298 |         remove_prefix_words(dedup_without_sorting(&mut tidied_list))
299 |     } else {
300 |         tidied_list
301 |     };
302 |     tidied_list = if req.should_schlinkert_prune {
303 |         schlinkert_prune(&dedup_without_sorting(&mut tidied_list))
304 |     } else {
305 |         tidied_list
306 |     };
307 | 
308 |     // Remove duplicate words
309 |     tidied_list = dedup_without_sorting(&mut tidied_list);
310 | 
311 |     // User can choose to print a limited number of words from nearly finished (but still
312 |     // unsorted) list.
313 |     // Can do so from the beginning of the nearly finished list...
314 |     tidied_list = match req.print_first {
315 |         Some(amount_to_cut) => {
316 |             tidied_list.truncate(amount_to_cut);
317 |             tidied_list
318 |         }
319 |         None => tidied_list,
320 |     };
321 |     // And/or can do so randomly
322 |     tidied_list = match req.print_rand {
323 |         Some(amount_to_cut) => {
324 |             let mut rng = rng();
325 |             tidied_list.shuffle(&mut rng);
326 |             tidied_list.truncate(amount_to_cut);
327 |             tidied_list
328 |         }
329 |         None => tidied_list,
330 |     };
331 |     // Finally, sort list alphabetically, if the user didn't override this default behavior
332 |     if req.sort_alphabetically {
333 |         // We used to just be content to run tidied_list.sort() here,
334 |         // but that doesn't support non-English languages and
335 |         // accented characters very well.
336 | 
337 |         // First, parse the given locale into a valid Locale
338 |         let loc = req.locale.to_string();
339 |         let loc: Locale = loc.parse().expect("Error: Given locale is not parse-able. Trying using form like 'en-US'; do not use underscores.");
340 | 
341 |         // Now use that Locale to sort the list more carefully
342 |         tidied_list = sort_carefully(tidied_list, loc);
343 |     }
344 |     if req.sort_by_length {
345 |         // First, parse the given locale into a valid Locale
346 |         let loc = req.locale.to_string();
347 |         let loc: Locale = loc.parse().expect("Error: given locale is not parse-able. Trying using form like 'en-US'; do not use underscores.");
348 | 
349 |         eprintln!("Calling sort_by_length");
350 |         tidied_list = sort_by_length(tidied_list, loc);
351 |     }
352 |     // And remove duplicates one more time
353 |     tidied_list = dedup_without_sorting(&mut tidied_list);
354 |     tidied_list
355 | }
356 | 
357 | use unicode_segmentation::UnicodeSegmentation;
358 | /// When counting characters of a word, we want to count all accented character as 1,
359 | /// regardless of the Unicode, to better approximate how humans would count the number
360 | /// of characters in a word.
361 | /// An alternate approach would be to convert each character to NFC before counting `word.nfc().count()`
362 | /// but I don't think this handles emoji as well as grapheme cluster counting.
363 | pub fn count_characters(word: &str) -> usize {
364 |     word.graphemes(true).count()
365 | }
366 | 
367 | /// Little helper function that allows users to write out whitespace
368 | /// delimiters "s" and "t", rather than having to enter the whitespace
369 | /// characters literally.
370 | pub fn parse_delimiter(delimiter: char) -> Option<char> {
371 |     if delimiter == 's' {
372 |         Some(' ')
373 |     } else if delimiter == 't' {
374 |         Some('\t')
375 |     } else {
376 |         Some(delimiter)
377 |     }
378 | }
379 | 
380 | /// Used for the to_whittle option
381 | pub fn get_new_starting_point_guess(
382 |     previous_starting_point: usize,
383 |     this_list_length: usize,
384 |     length_to_whittle_to: usize,
385 | ) -> usize {
386 |     let mut starting_point = previous_starting_point;
387 |     if this_list_length > length_to_whittle_to {
388 |         // We're too high!
389 |         let difference = this_list_length - length_to_whittle_to;
390 |         let multiplier = starting_point as f64 / length_to_whittle_to as f64;
391 |         let mut change = (difference as f64 * multiplier).floor() as usize;
392 |         // Edge case we need to catch to avoid an infinite loop
393 |         if change == 0 {
394 |             change = 1;
395 |         }
396 |         starting_point -= change;
397 |     } else {
398 |         // We're too low!
399 |         let difference = length_to_whittle_to - this_list_length;
400 |         let multiplier = starting_point as f64 / length_to_whittle_to as f64;
401 |         let mut change = (difference as f64 * multiplier).floor() as usize;
402 |         // Edge case we need to catch to avoid an infinite loop
403 |         if change == 0 {
404 |             change = 1;
405 |         }
406 |         starting_point += change;
407 |     }
408 |     starting_point
409 | }
410 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | use clap::Parser;
  2 | use std::env;
  3 | use std::path::Path;
  4 | use std::path::PathBuf;
  5 | use tidy::*;
  6 | pub mod display_information;
  7 | pub mod input_validations;
  8 | use crate::file_readers::*;
  9 | use crate::file_writer::*;
 10 | use crate::input_validations::*;
 11 | use crate::parsers::*;
 12 | 
 13 | /// Combine and clean word lists
 14 | #[derive(Parser, Debug)]
 15 | #[clap(version, about, name = "tidy")]
 16 | struct Args {
 17 |     /// Path(s) for optional list of approved words. Can accept multiple
 18 |     /// files.
 19 |     #[clap(short = 'a', long = "approve")]
 20 |     approved_list: Option<Vec<PathBuf>>,
 21 | 
 22 |     /// Print attributes about new list to terminal. Can be used more than once
 23 |     /// to print more attributes. Some attributes may take a nontrivial amount
 24 |     /// of time to calculate.
 25 |     #[clap(short = 'A', long = "attributes", action = clap::ArgAction::Count)]
 26 |     attributes: u8,
 27 | 
 28 |     /// Print attributes and word samples in JSON format
 29 |     #[clap(short = 'j', long = "json")]
 30 |     attributes_as_json: bool,
 31 | 
 32 |     /// Print playing card abbreviation next to each word.
 33 |     /// Strongly recommend only using on lists with lengths that are powers
 34 |     /// of 26 (26^1, 26^2, 26^3, etc.)
 35 |     #[clap(long = "cards")]
 36 |     cards: bool,
 37 | 
 38 |     /// Debug mode
 39 |     #[clap(long = "debug")]
 40 |     debug: bool,
 41 | 
 42 |     /// Delete all characters after the first instance of the specified delimiter until the end of line
 43 |     /// (including the delimiter). Delimiter must be a single character (e.g., ','). Use 't' for tab and
 44 |     /// 's' for space. May not be used together with -g or -G options.
 45 |     #[clap(short = 'd', long = "delete-after")]
 46 |     delete_after_delimiter: Option<char>,
 47 | 
 48 |     /// Delete all characters before and including the first instance of the specified delimiter. Delimiter
 49 |     /// must be a single character (e.g., ','). Use 't' for tab and 's' for space. May not be used
 50 |     /// together with -g or -G options.
 51 |     #[clap(short = 'D', long = "delete-before")]
 52 |     delete_before_delimiter: Option<char>,
 53 | 
 54 |     /// Delete all integers from all words on new list
 55 |     #[clap(short = 'i', long = "delete-integers")]
 56 |     delete_integers: bool,
 57 | 
 58 |     /// Delete all non-alphanumeric characters from all words on new list. Characters with diacritics
 59 |     /// will remain
 60 |     #[clap(short = 'n', long = "delete-nonalphanumeric")]
 61 |     delete_nonalphanumeric: bool,
 62 | 
 63 |     /// Print dice roll before word in output. Set number of sides
 64 |     /// of dice. Must be between 2 and 36. Use 6 for normal dice.
 65 |     #[clap(long = "dice")]
 66 |     dice_sides: Option<u8>,
 67 | 
 68 |     /// Dry run. Don't write new list to file or terminal.
 69 |     #[clap(long = "dry-run")]
 70 |     dry_run: bool,
 71 | 
 72 |     /// Force overwrite of output file if it exists.
 73 |     #[clap(short = 'f', long = "force")]
 74 |     force_overwrite: bool,
 75 | 
 76 |     /// Path(s) to file(s) containing homophone pairs. There must be one pair
 77 |     /// of homophones per line, separated by a comma (sun,son). If BOTH words
 78 |     /// are found on a list, the SECOND word is removed. File(s) can be a CSV
 79 |     /// (with no column headers) or TXT file(s).
 80 |     #[clap(long = "homophones")]
 81 |     homophones_list: Option<Vec<PathBuf>>,
 82 | 
 83 |     /// Ignore characters after the first instance of the specified delimiter until the end of line, treating
 84 |     /// anything before the delimiter as a word. Delimiter must be a single character (e.g., ','). Use 't'
 85 |     /// for tab and 's' for space. Helpful for ignoring metadata like word frequencies.
 86 |     /// Works with attribute analysis and most word removal options, but not with word modifications
 87 |     /// (like to lowercase). May not be used together with -d, -D or -G options.
 88 |     #[clap(short = 'g', long = "ignore-after")]
 89 |     ignore_after_delimiter: Option<char>,
 90 | 
 91 |     /// Ignore characters before and including the first instance of the specified delimiter, treating
 92 |     /// anything after the delimiter as a word. Delimiter must be a single character (e.g., ','). Use 't'
 93 |     /// for tab and 's' for space. Helpful for ignoring metadata like word frequencies.
 94 |     /// Works with attribute analysis and most word removal options, but not with word modifications
 95 |     /// (like to lowercase). May not be used together with -d, -D or -g options.
 96 |     #[clap(short = 'G', long = "ignore-before")]
 97 |     ignore_before_delimiter: Option<char>,
 98 | 
 99 |     /// Specify a locale for words on the list. Aids with sorting. Examples: en-US, es-ES. Defaults
100 |     /// to system LANG. If LANG environmental variable is not set, uses en-US.
101 |     #[clap(long = "locale")]
102 |     locale: Option<String>,
103 | 
104 |     /// Lowercase all words on new list
105 |     #[clap(short = 'l', long = "lowercase")]
106 |     to_lowercase: bool,
107 | 
108 |     /// Set maximum word length
109 |     #[clap(short = 'M', long = "maximum-word-length")]
110 |     maximum_length: Option<usize>,
111 | 
112 |     /// Set number of leading characters to get to a unique prefix,
113 |     /// which can aid auto-complete functionality.
114 |     /// Setting this value to say, 4, means that knowing the first
115 |     /// 4 characters of any word on the generated list is enough
116 |     /// to know which word it is.
117 |     #[clap(short = 'x', long = "shared-prefix-length")]
118 |     maximum_shared_prefix_length: Option<usize>,
119 | 
120 |     /// Set minimum edit distance between words, which
121 |     /// can reduce the cost of typos when entering words
122 |     #[clap(short = 'e', long = "minimum-edit-distance")]
123 |     minimum_edit_distance: Option<usize>,
124 | 
125 |     /// Set minimum word length
126 |     #[clap(short = 'm', long = "minimum-word-length")]
127 |     minimum_length: Option<usize>,
128 | 
129 |     /// Sort by word length, with longest words first. First sorts words
130 |     /// alphabetically, respecting inputted locale.
131 |     #[clap(long = "sort-by-length", conflicts_with = "no_alpha_sort")]
132 |     sort_by_length: bool,
133 | 
134 |     /// Do NOT sort outputted list alphabetically. Preserves original list order.
135 |     /// Note that duplicate lines and blank lines will still be removed.
136 |     #[clap(short = 'O', long = "no-sort", conflicts_with = "sort_by_length")]
137 |     no_alpha_sort: bool,
138 | 
139 |     /// Normalize Unicode of all characters of all words. Accepts nfc, nfd, nfkc, or nfkd (case
140 |     /// insensitive).
141 |     #[clap(short = 'z', long = "normalization-form")]
142 |     normalization_form: Option<String>,
143 | 
144 |     /// Path for outputted list file. If none given, generated word list
145 |     /// will be printed to terminal.
146 |     #[clap(short = 'o', long = "output")]
147 |     output: Option<PathBuf>,
148 | 
149 |     /// When printing dice roll before word in output, print dice values
150 |     /// according to the base selected through --dice option. Effectively
151 |     /// this means that letters will be used to represent numbers higher
152 |     /// than 9. Note that this option also 0-indexes the dice values.
153 |     /// This setting defaults to `false`, which will 1-indexed dice values,
154 |     /// and use double-digit numbers when necessary (e.g. 18-03-08).
155 |     #[clap(long = "sides-as-base")]
156 |     print_dice_sides_as_their_base: bool,
157 | 
158 |     /// Just before printing generated list, cut list down
159 |     /// to a set number of words. Can accept expressions in the
160 |     /// form of base**exponent (helpful for generating diceware lists).
161 |     /// Words are selected from the beginning of processed list, and before it is sorted alphabetically.
162 |     #[clap(long = "print-first", value_parser=eval_list_length)]
163 |     print_first: Option<usize>,
164 | 
165 |     /// Just before printing generated list, cut list down
166 |     /// to a set number of words. Can accept expressions in the
167 |     /// form of base**exponent (helpful for generating diceware lists).
168 |     /// Cuts are done randomly.
169 |     #[clap(long = "print-rand", value_parser=eval_list_length)]
170 |     print_rand: Option<usize>,
171 | 
172 |     /// Do not print any extra information
173 |     #[clap(long = "quiet")]
174 |     quiet: bool,
175 | 
176 |     /// Remove all words with integers in them from list
177 |     #[clap(short = 'I', long = "remove-integers")]
178 |     remove_integers: bool,
179 | 
180 |     /// Remove all words with non-alphanumeric characters from new list. Words with diacritics will
181 |     /// remain
182 |     #[clap(short = 'N', long = "remove-nonalphanumeric")]
183 |     remove_nonalphanumeric: bool,
184 | 
185 |     /// Remove all words with non-alphabetic characters from new list. Words with diacritcis and
186 |     /// other non-Latin characters will remain.
187 |     #[clap(long = "remove-nonalphabetic")]
188 |     remove_nonalphabetic: bool,
189 | 
190 |     /// Remove all words with any characters not in the Latin alphabet (A through Z and a through
191 |     /// z). All words with accented or diacritic characters will be removed, as well as
192 |     /// any words with puncuation and internal whitespace.
193 |     #[clap(short = 'L', long = "remove-non-latin-alphabetic")]
194 |     remove_non_latin_alphabetic: bool,
195 | 
196 |     /// Remove all words that have any non-ASCII characters from new list
197 |     #[clap(short = 'C', long = "remove-nonascii")]
198 |     remove_nonascii: bool,
199 | 
200 |     /// Remove prefix words from new list
201 |     #[clap(short = 'P', long = "remove-prefix")]
202 |     remove_prefix_words: bool,
203 | 
204 |     /// Remove suffix words from new list
205 |     #[clap(short = 'S', long = "remove-suffix")]
206 |     remove_suffix_words: bool,
207 | 
208 |     /// Path(s) for optional list of words to reject. Can accept multiple
209 |     /// files.
210 |     #[clap(short = 'r', long = "reject")]
211 |     reject_list: Option<Vec<PathBuf>>,
212 | 
213 |     /// Print a handful of pseudorandomly selected words from the created list
214 |     /// to the terminal. Should NOT be used as secure passphrases.
215 |     #[clap(short = 's', long = "samples")]
216 |     samples: bool,
217 | 
218 |     /// Use Sardinas-Patterson algorithm to remove words to make list
219 |     /// uniquely decodable. Experimental!
220 |     #[clap(short = 'K', long = "schlinkert-prune")]
221 |     schlinkert_prune: bool,
222 | 
223 |     /// Skip first number of lines from inputted files. Useful for dealing with headers like from
224 |     /// PGP signatures
225 |     #[clap(long = "skip-rows-start")]
226 |     skip_rows_start: Option<usize>,
227 | 
228 |     /// Skip last number of lines from inputted files. Useful for dealing with footers like from
229 |     /// PGP signatures.
230 |     #[clap(long = "skip-rows-end")]
231 |     skip_rows_end: Option<usize>,
232 | 
233 |     /// Replace “smart” quotation marks, both “double” and ‘single’,
234 |     /// with their "straight" versions
235 |     #[clap(short = 'q', long = "straighten")]
236 |     straighten_quotes: bool,
237 | 
238 |     /// Only take first N words from inputted word list. If two or more word list files are
239 |     /// inputted, it will combine all given lists by alternating words from the given word list
240 |     /// files until it has N words
241 |     #[clap(long = "take-first", value_parser=eval_list_length)]
242 |     take_first: Option<usize>,
243 | 
244 |     /// Only take a random N number of words from inputted word list.
245 |     /// If two or more word lists are inputted, it will
246 |     /// combine arbitrarily and then take a random N words. If you're looking to cut a list exactly
247 |     /// to a specified size, consider print-rand or whittle-to options.
248 |     #[clap(long = "take-rand", value_parser=eval_list_length)]
249 |     take_rand: Option<usize>,
250 | 
251 |     /// Whittle list exactly to a specified length, only taking minimum number of words
252 |     /// from the beginning of inputted list(s).
253 |     /// If the outputted list is not exactly the specified length, it will try again by taking a
254 |     /// different amount of words form input list(s). As a result, this using this option may cause
255 |     /// Tidy to take a moment to produce the finished list.
256 |     /// Can accept expressions in the form of base**exponent (helpful for generating diceware lists).
257 |     ///
258 |     /// This option should generally only be used if all of the following conditions are met:
259 |     /// (a) the inputted word list is sorted by desirability (e.g. ordered by word frequency);
260 |     /// (b) the user is either removing prefix words, removing suffix words, or doing a Schlinkert prune;
261 |     /// (c) the user needs the resulting list to be a specified length.
262 |     ///
263 |     /// Optionally can also take a "starting point" after a comma. For example,
264 |     /// --whittle-to 7776,15000 would start by taking the first 15,000 words
265 |     /// from the inputted list(s) as a first attempt at making a list of 7,776 words, iterating
266 |     /// if necessary.
267 |     #[clap(short = 'W', long = "whittle-to")]
268 |     whittle_to: Option<String>,
269 | 
270 |     /// Word list input files. Can be more than one, in which case
271 |     /// they'll be combined and de-duplicated. Requires at least
272 |     /// one file.
273 |     #[clap(name = "Inputted Word Lists", required = true)]
274 |     inputted_word_lists: Vec<PathBuf>,
275 | }
276 | 
277 | fn main() -> Result<(), String> {
278 |     let opt = Args::parse();
279 |     if opt.debug {
280 |         eprintln!("Received args: {:?}", opt);
281 |     }
282 | 
283 |     // Some initial validations
284 |     // Check given number of dice sides
285 |     match validate_dice_sides(opt.dice_sides) {
286 |         Ok(()) => (),
287 |         Err(e) => {
288 |             return Err(e.to_string());
289 |         }
290 |     }
291 | 
292 |     // Check if any of inputted_word_lists are directories
293 |     for file in &opt.inputted_word_lists {
294 |         if file.is_dir() {
295 |             return Err(format!("Given file {:?} is a directory", file));
296 |         }
297 |     }
298 | 
299 |     if opt.cards && opt.dice_sides.is_some() {
300 |         return Err(
301 |             "Error: Cannot use dice and cards. Must be either cards or dice or neither."
302 |                 .to_string(),
303 |         );
304 |     }
305 | 
306 |     match validate_list_truncation_options(
307 |         &opt.whittle_to,
308 |         opt.print_rand,
309 |         opt.take_first,
310 |         opt.take_rand,
311 |     ) {
312 |         Ok(()) => (),
313 |         Err(e) => {
314 |             return Err(e.to_string());
315 |         }
316 |     }
317 | 
318 |     // Check if output file exists
319 |     if let Some(ref output_file_name) = opt.output {
320 |         if !opt.force_overwrite && Path::new(output_file_name).exists() {
321 |             return Err(
322 |                 "Specified output file already exists. Use --force flag to force an overwrite."
323 |                     .to_string(),
324 |             );
325 |         }
326 |     }
327 | 
328 |     // Determine if this is a niche case in which whittle_to would be a smarter choice
329 |     // than (either) print_first or print_rand.
330 |     if (opt.print_first.is_some() || opt.print_rand.is_some())
331 |         && opt.whittle_to.is_none()
332 |         && (opt.remove_prefix_words || opt.remove_suffix_words || opt.schlinkert_prune)
333 |         && !opt.quiet
334 |     {
335 |         if opt.print_first.is_some() {
336 |             eprintln!(
337 |                 "RECOMMENDATION: If your input list is sorted by desirability (e.g. word frequency), consider using --whittle-to rather than --print-first if you're removing prefix words, removing suffix words, and/or doing a Schlinkert prune.\n"
338 |             );
339 |         }
340 |         if opt.print_rand.is_some() {
341 |             eprintln!(
342 |                 "RECOMMENDATION: If your input list is sorted by desirability (e.g. word frequency), consider using --whittle-to rather than --print-rand if you're removing prefix words, removing suffix words, and/or doing a Schlinkert prune.\n"
343 |             );
344 |         }
345 |     }
346 | 
347 |     // OK let's do this. Make a Tidy request.
348 |     // While it's not declared as mutable here, we will reassign it
349 |     // it later, unfortunately.
350 |     let this_tidy_request = TidyRequest {
351 |         list: make_vec_from_filenames(
352 |             &opt.inputted_word_lists,
353 |             opt.skip_rows_start,
354 |             opt.skip_rows_end,
355 |         ),
356 |         take_first: opt.take_first,
357 |         take_rand: opt.take_rand,
358 |         sort_alphabetically: !opt.no_alpha_sort,
359 |         sort_by_length: opt.sort_by_length,
360 |         ignore_after_delimiter: opt.ignore_after_delimiter,
361 |         ignore_before_delimiter: opt.ignore_before_delimiter,
362 |         to_lowercase: opt.to_lowercase,
363 |         normalization_form: opt.normalization_form,
364 |         locale: match opt.locale {
365 |             Some(lang) => lang,
366 |             None => match get_system_lang() {
367 |                 Some(lang) => lang.replace("_", "-"),
368 |                 None => "en-US".to_string(),
369 |             },
370 |         },
371 |         should_straighten_quotes: opt.straighten_quotes,
372 |         should_remove_prefix_words: opt.remove_prefix_words,
373 |         should_remove_suffix_words: opt.remove_suffix_words,
374 |         should_schlinkert_prune: opt.schlinkert_prune,
375 |         should_remove_integers: opt.remove_integers,
376 |         should_delete_integers: opt.delete_integers,
377 |         should_remove_nonalphanumeric: opt.remove_nonalphanumeric,
378 |         should_delete_nonalphanumeric: opt.delete_nonalphanumeric,
379 |         should_remove_nonalphabetic: opt.remove_nonalphabetic,
380 |         should_remove_non_latin_alphabetic: opt.remove_non_latin_alphabetic,
381 |         should_remove_nonascii: opt.remove_nonascii,
382 |         should_delete_after_first_delimiter: opt.delete_after_delimiter,
383 |         should_delete_before_first_delimiter: opt.delete_before_delimiter,
384 | 
385 |         // If given more than one file of reject words, combine them
386 |         // right here.
387 |         reject_list: opt
388 |             .reject_list
389 |             .map(|list_of_files| make_vec_from_filenames(&list_of_files, None, None)),
390 |         // Likewise with approved word lists
391 |         approved_list: opt
392 |             .approved_list
393 |             .map(|list_of_files| make_vec_from_filenames(&list_of_files, None, None)),
394 |         // And homophones
395 |         homophones_list: opt
396 |             .homophones_list
397 |             .map(|list_of_files| read_homophones_list_from_filenames(&list_of_files)),
398 |         minimum_length: opt.minimum_length,
399 |         maximum_length: opt.maximum_length,
400 |         maximum_shared_prefix_length: opt.maximum_shared_prefix_length,
401 |         minimum_edit_distance: opt.minimum_edit_distance,
402 |         print_rand: opt.print_rand,
403 |         print_first: opt.print_first,
404 |     };
405 | 
406 |     let (ignore_before_delimiter, ignore_after_delimiter) = match validate_and_parse_ignore_options(
407 |         &this_tidy_request,
408 |         opt.dice_sides,
409 |         opt.print_dice_sides_as_their_base,
410 |     ) {
411 |         Ok((ignore_before_delimiter, ignore_after_delimiter)) => {
412 |             (ignore_before_delimiter, ignore_after_delimiter)
413 |         }
414 |         Err(e) => {
415 |             return Err(e.to_string());
416 |         }
417 |     };
418 | 
419 |     // Parse provided "whittle string" for a length_to_whittle_to and an
420 |     // optional starting point.
421 |     let (mut this_tidy_request, length_to_whittle_to, starting_point) =
422 |         match parse_whittle_options(this_tidy_request, opt.whittle_to) {
423 |             Ok((this_tidy_request, length_to_whittle_to, starting_point)) => {
424 |                 (this_tidy_request, length_to_whittle_to, starting_point)
425 |             }
426 |             Err(e) => {
427 |                 return Err(e);
428 |             }
429 |         };
430 | 
431 |     // Finally get to actually tidy the inputted_word_list
432 |     // If we have a length_to_whittle_to and a starting_point, we know we're
433 |     // whittling, which is (still) a bit too complicated for my tastes. But we
434 |     // need a while loop here.
435 |     let mut this_list_length = 0;
436 |     let tidied_list = match (length_to_whittle_to, starting_point) {
437 |         (Some(our_length_to_whittle_to), Some(mut our_starting_point)) => {
438 |             let mut this_tidied_list = vec![];
439 |             while this_list_length != our_length_to_whittle_to {
440 |                 // Edit this_tidy_request to have our new starting point
441 |                 this_tidy_request.take_first = Some(our_starting_point);
442 | 
443 |                 // This clone might be too expensice. maybe tidy_list can take a
444 |                 // reference?
445 |                 this_tidied_list = tidy_list(this_tidy_request.clone());
446 | 
447 |                 this_list_length = this_tidied_list.len();
448 |                 our_starting_point = get_new_starting_point_guess(
449 |                     our_starting_point,
450 |                     this_list_length,
451 |                     our_length_to_whittle_to,
452 |                 );
453 |                 if opt.debug {
454 |                     eprintln!(
455 |                         "Whittled list to {}. Will try again, taking {} words.",
456 |                         this_list_length, our_starting_point
457 |                     );
458 |                 }
459 |             }
460 |             // Out of the loop, which means the list is the user-specified
461 |             // length. return this verison of the list.
462 |             this_tidied_list
463 |         }
464 |         (_, _) => {
465 |             // In all other cases, `whittle_to` option not specified, so
466 |             // proceed as normal, sending all parameters in this_tidied_list
467 |             // as they are just once.
468 |             tidy_list(this_tidy_request)
469 |         }
470 |     };
471 | 
472 |     // Next, we figure out what to print where
473 |     let this_print_request = PrintRequest {
474 |         tidied_list,
475 |         dry_run: opt.dry_run,
476 |         quiet: opt.quiet,
477 |         output: opt.output,
478 |         cards: opt.cards,
479 |         dice_sides: opt.dice_sides,
480 |         print_dice_sides_as_their_base: opt.print_dice_sides_as_their_base,
481 |         attributes: opt.attributes,
482 |         attributes_as_json: opt.attributes_as_json,
483 |         samples: opt.samples,
484 |         ignore_before_delimiter,
485 |         ignore_after_delimiter,
486 |     };
487 |     print_list(this_print_request);
488 | 
489 |     Ok(())
490 | }
491 | 
492 | /// Read LANG environmental variable, if possible
493 | fn get_system_lang() -> Option<String> {
494 |     let name_of_environmental_variable = "LANG";
495 |     match env::var(name_of_environmental_variable) {
496 |         Ok(l) => Some(l.split('.').collect::<Vec<_>>()[0].to_string()),
497 |         Err(_e) => None,
498 |     }
499 | }
500 | 


--------------------------------------------------------------------------------
/src/display_information/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Display attributes and information about the generated word list
  2 | 
  3 | pub mod uniquely_decodable;
  4 | use crate::count_characters;
  5 | use crate::display_information::uniquely_decodable::is_uniquely_decodable;
  6 | use crate::parse_delimiter;
  7 | use crate::split_and_vectorize;
  8 | use serde::{Deserialize, Serialize};
  9 | use std::fmt;
 10 | 
 11 | #[derive(Serialize, Deserialize)]
 12 | pub struct ListAttributes {
 13 |     pub list_length: usize,
 14 |     pub mean_word_length: f32,
 15 |     pub entropy_per_word: f64,
 16 |     pub shortest_word_length: usize,
 17 |     pub shortest_word_example: String,
 18 |     pub longest_word_length: usize,
 19 |     pub longest_word_example: String,
 20 | 
 21 |     pub is_free_of_prefix_words: Option<bool>,
 22 |     pub is_free_of_suffix_words: Option<bool>,
 23 | 
 24 |     pub is_uniquely_decodable: Option<bool>,
 25 | 
 26 |     pub efficiency_per_character: f64,
 27 |     pub assumed_entropy_per_character: f64,
 28 |     pub is_above_brute_force_line: bool,
 29 |     pub is_above_shannon_line: bool,
 30 |     pub shortest_edit_distance: Option<usize>,
 31 |     pub mean_edit_distance: Option<f64>,
 32 |     pub longest_shared_prefix: Option<usize>,
 33 |     pub unique_character_prefix: Option<usize>,
 34 |     pub kraft_mcmillan: KraftMcmillanOutcome,
 35 |     pub samples: Option<Vec<String>>,
 36 | }
 37 | 
 38 | #[derive(Serialize, Deserialize, Debug)]
 39 | pub enum KraftMcmillanOutcome {
 40 |     Satisfied,
 41 |     NotSatisfied,
 42 | }
 43 | impl fmt::Display for KraftMcmillanOutcome {
 44 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 45 |         match self {
 46 |             KraftMcmillanOutcome::Satisfied => write!(f, "satisfied"),
 47 |             KraftMcmillanOutcome::NotSatisfied => write!(f, "not satisfied"),
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | fn make_attributes(list: &[String], level: u8, samples: bool) -> ListAttributes {
 53 |     let samples = if samples {
 54 |         Some(generate_samples(list))
 55 |     } else {
 56 |         None
 57 |     };
 58 |     let shortest_word_example = list
 59 |         .iter()
 60 |         .min_by(|a, b| count_characters(a).cmp(&count_characters(b)))
 61 |         .unwrap()
 62 |         .to_string();
 63 |     let longest_word_example = list
 64 |         .iter()
 65 |         .max_by(|a, b| count_characters(a).cmp(&count_characters(b)))
 66 |         .unwrap()
 67 |         .to_string();
 68 | 
 69 |     let is_free_of_prefix_words = if level >= 2 {
 70 |         Some(!has_prefix_words(list))
 71 |     } else {
 72 |         None
 73 |     };
 74 | 
 75 |     let is_free_of_suffix_words = if level >= 2 {
 76 |         Some(!has_suffix_words(list))
 77 |     } else {
 78 |         None
 79 |     };
 80 | 
 81 |     let is_uniquely_decodable = if level >= 2 {
 82 |         Some(is_uniquely_decodable(list))
 83 |     } else {
 84 |         None
 85 |     };
 86 | 
 87 |     let shortest_edit_distance = if level >= 3 {
 88 |         Some(find_shortest_edit_distance(list))
 89 |     } else {
 90 |         None
 91 |     };
 92 |     let mean_edit_distance = if level >= 3 {
 93 |         Some(find_mean_edit_distance(list))
 94 |     } else {
 95 |         None
 96 |     };
 97 | 
 98 |     let longest_shared_prefix = if level >= 4 {
 99 |         Some(find_longest_shared_prefix(
100 |             list,
101 |             Some(count_characters(&longest_word_example)),
102 |         ))
103 |     } else {
104 |         None
105 |     };
106 |     let unique_character_prefix = if level >= 4 {
107 |         // By definition, unique_character_prefix == longest_shared_prefix + 1
108 |         // We have to use map in case longest_shared_prefix is None, which is
109 |         // unlikely, but technically possible.
110 |         longest_shared_prefix.map(|longest_shared_prefix| longest_shared_prefix + 1)
111 |     } else {
112 |         None
113 |     };
114 |     ListAttributes {
115 |         list_length: list.len(),
116 |         mean_word_length: mean_word_length(list),
117 |         entropy_per_word: calc_entropy_per_word(list.len()),
118 |         shortest_word_length: count_characters(&shortest_word_example),
119 |         shortest_word_example,
120 |         longest_word_length: count_characters(&longest_word_example),
121 |         longest_word_example,
122 |         efficiency_per_character: efficiency_per_character(list),
123 |         assumed_entropy_per_character: assumed_entropy_per_character(list),
124 |         is_above_brute_force_line: is_above_brute_force_line(list),
125 |         is_above_shannon_line: is_above_shannon_line(list),
126 |         is_free_of_prefix_words,
127 |         is_free_of_suffix_words,
128 |         is_uniquely_decodable,
129 |         shortest_edit_distance,
130 |         mean_edit_distance,
131 |         longest_shared_prefix,
132 |         unique_character_prefix,
133 |         kraft_mcmillan: satisfies_kraft_mcmillan(list),
134 |         samples,
135 |     }
136 | }
137 | 
138 | /// If user gets a passphrase consisting entirely of shortest words,
139 | /// it's theoretically possible that we could OVERESTIMATE entropy
140 | /// per word. We can deterimine if we've done this by comparing out
141 | /// entropy estimate against a simple brute force attack of all lowercase
142 | /// English letters, under which we assume each character adds roughly 4.7 bits of entropy.
143 | /// Note that this slightly obscure method of calculation ensures that floating-point arithmetic is
144 | /// not used, thus ensuring a higher level of accuracy.
145 | fn is_above_brute_force_line(list: &[String]) -> bool {
146 |     let g: i32 = 26; // roughly: assumed alphabet length
147 |     let shortest_word_length = get_shortest_word_length(list) as u32;
148 |     let list_length = list.len() as i32;
149 |     list_length as f64 <= g.pow(shortest_word_length).into()
150 | }
151 | 
152 | /// In 1951, Claude Shannon estimated that English words only have
153 | /// about 2.6 bits of entropy per character, rather than (roughly) 4.7 bits per character.
154 | /// <https://www.princeton.edu/~wbialek/rome/refs/shannon_51.pdf>
155 | /// Thus, this is a more difficult line for a given list to pass above than
156 | /// the "brute force" line described above.
157 | fn is_above_shannon_line(list: &[String]) -> bool {
158 |     let shortest_word_length = get_shortest_word_length(list) as u32;
159 |     let g: f64 = 6.1; // 2**2.6 is 6.1 when we maintain correct number of significant digits.
160 |     let list_length = list.len() as i32;
161 |     list_length as f64 <= g.powf(shortest_word_length.into())
162 | }
163 | 
164 | /// This is a large and long function that prints all of the attributes of
165 | /// the generated (new) list.
166 | ///
167 | /// We just want to "display" this information, rather than print it to files
168 | /// or stdout, so we use `eprintln!`
169 | pub fn display_list_information(
170 |     list: &[String],
171 |     level: u8,
172 |     attributes_as_json: bool,
173 |     ignore_ending_metadata_delimiter: Option<char>,
174 |     ignore_starting_metadata_delimiter: Option<char>,
175 |     samples: bool,
176 | ) {
177 |     let list = make_list_free_of_metadata(
178 |         list,
179 |         ignore_starting_metadata_delimiter,
180 |         ignore_ending_metadata_delimiter,
181 |     );
182 |     let list_attributes = make_attributes(&list, level, samples);
183 |     if attributes_as_json {
184 |         print_attributes_as_json(&list_attributes);
185 |     } else {
186 |         if level >= 1 {
187 |             eprintln!("Attributes of new list");
188 |             eprintln!("----------------------");
189 |             eprintln!(
190 |                 "List length               : {} words",
191 |                 list_attributes.list_length
192 |             );
193 |             eprintln!(
194 |                 "Mean word length          : {:.2} characters",
195 |                 list_attributes.mean_word_length
196 |             );
197 |             eprintln!(
198 |                 "Length of shortest word   : {} characters ({})",
199 |                 list_attributes.shortest_word_length, list_attributes.shortest_word_example
200 |             );
201 |             eprintln!(
202 |                 "Length of longest word    : {} characters ({})",
203 |                 list_attributes.longest_word_length, list_attributes.longest_word_example
204 |             );
205 |             if let Some(is_free_of_prefix_words) = list_attributes.is_free_of_prefix_words {
206 |                 eprintln!("Free of prefix words?     : {}", is_free_of_prefix_words);
207 |             }
208 |             if let Some(is_free_of_suffix_words) = list_attributes.is_free_of_suffix_words {
209 |                 eprintln!("Free of suffix words?     : {:?}", is_free_of_suffix_words);
210 |             }
211 | 
212 |             // At least for now, this one is EXPENSIVE
213 |             if let Some(is_uniquely_decodable) = list_attributes.is_uniquely_decodable {
214 |                 eprintln!("Uniquely decodable?       : {:?}", is_uniquely_decodable);
215 |             }
216 | 
217 |             eprintln!(
218 |                 "Entropy per word          : {:.3} bits",
219 |                 list_attributes.entropy_per_word
220 |             );
221 |             eprintln!(
222 |                 "Efficiency per character  : {:.3} bits",
223 |                 list_attributes.efficiency_per_character
224 |             );
225 |             eprintln!(
226 |                 "Assumed entropy per char  : {:.3} bits",
227 |                 list_attributes.assumed_entropy_per_character
228 |             );
229 |             eprintln!(
230 |                 "Above brute force line?   : {}",
231 |                 list_attributes.is_above_brute_force_line
232 |             );
233 | 
234 |             if level >= 4 {
235 |                 eprintln!(
236 |                     "Above Shannon line?       : {}",
237 |                     list_attributes.is_above_shannon_line
238 |                 );
239 |             }
240 | 
241 |             if let Some(shortest_edit_distance) = list_attributes.shortest_edit_distance {
242 |                 eprintln!("Shortest edit distance    : {}", shortest_edit_distance)
243 |             }
244 |             if let Some(mean_edit_distance) = list_attributes.mean_edit_distance {
245 |                 eprintln!("Mean edit distance        : {:.3}", mean_edit_distance)
246 |             }
247 | 
248 |             if let Some(longest_shared_prefix) = list_attributes.longest_shared_prefix {
249 |                 eprintln!("Longest shared prefix     : {}", longest_shared_prefix)
250 |             }
251 |             // Numbers of characters required to definitely get to a unique
252 |             // prefix
253 |             if let Some(unique_character_prefix) = list_attributes.unique_character_prefix {
254 |                 eprintln!("Unique character prefix   : {}", unique_character_prefix)
255 |             }
256 | 
257 |             if level >= 4 {
258 |                 eprintln!(
259 |                     "Kraft-McMillan inequality : {}",
260 |                     list_attributes.kraft_mcmillan
261 |                 );
262 |             }
263 |         }
264 |         if let Some(samples) = list_attributes.samples {
265 |             print_samples(samples)
266 |         }
267 |     }
268 | }
269 | 
270 | fn print_attributes_as_json(list_attributes: &ListAttributes) {
271 |     let json = serde_json::to_string(&list_attributes).unwrap();
272 |     eprintln!("{}", json);
273 | }
274 | 
275 | fn make_list_free_of_metadata(
276 |     list: &[String],
277 |     ignore_ending_metadata_delimiter: Option<char>,
278 |     ignore_starting_metadata_delimiter: Option<char>,
279 | ) -> Vec<String> {
280 |     match (
281 |         ignore_ending_metadata_delimiter,
282 |         ignore_starting_metadata_delimiter,
283 |     ) {
284 |         (Some(delimiter), None) => {
285 |             let delimiter = parse_delimiter(delimiter).unwrap();
286 |             let mut just_the_words = vec![];
287 |             for word in list {
288 |                 let split_vec = split_and_vectorize(word, &delimiter.to_string());
289 |                 just_the_words.push(split_vec[1].to_string());
290 |             }
291 |             just_the_words
292 |         }
293 |         (None, Some(delimiter)) => {
294 |             let delimiter = parse_delimiter(delimiter).unwrap();
295 |             let mut just_the_words = vec![];
296 |             for word in list {
297 |                 let split_vec = split_and_vectorize(word, &delimiter.to_string());
298 |                 just_the_words.push(split_vec[0].to_string());
299 |             }
300 |             just_the_words
301 |         }
302 |         (Some(ref _delimiter1), Some(ref _delimiter2)) => {
303 |             panic!("Can't ignore metadata on both sides currently")
304 |         }
305 |         (None, None) => list.to_vec(),
306 |     }
307 | }
308 | 
309 | use rand::prelude::IndexedRandom;
310 | /// Print 5 sample 6-word passphrases from the newly created
311 | /// word list.
312 | pub fn generate_samples(list: &[String]) -> Vec<String> {
313 |     let mut samples: Vec<String> = vec![];
314 |     for _n in 0..30 {
315 |         match list.choose(&mut rand::rng()) {
316 |             Some(word) => samples.push(word.to_string()),
317 |             None => panic!("Couldn't pick a random word"),
318 |         }
319 |     }
320 |     samples
321 | }
322 | 
323 | /// Calculate the entropy per word of a word list, given its size.
324 | /// We want this entropy value measured in bits, hence the use
325 | /// of log2()
326 | ///
327 | /// Returns `f64` because this value to return (bits of entropy per
328 | /// word) will most likely not be a whole number (which is fine!)
329 | pub fn calc_entropy_per_word(list_length: usize) -> f64 {
330 |     (list_length as f64).log2()
331 | }
332 | 
333 | use crate::edit_distance::find_edit_distance;
334 | /// Calculate the shortest edit distance between any two words on the list.
335 | fn find_shortest_edit_distance(list: &[String]) -> usize {
336 |     // This use of max_value is smelly, but not sure I know how to do it better.
337 |     let mut shortest_edit_distance = u32::MAX;
338 |     // I think I can cheat and only go through half of the list here
339 |     for word1 in list[0..(list.len() / 2)].iter() {
340 |         for word2 in list {
341 |             if word1 != word2 {
342 |                 let this_edit_distance = find_edit_distance(word1, word2);
343 |                 if this_edit_distance < shortest_edit_distance {
344 |                     shortest_edit_distance = this_edit_distance;
345 |                 }
346 |                 // If we're found an edit distance of 1, we know that'll be the
347 |                 // shortest possible (since Tidy removes duplicates by default, so
348 |                 // a shortest_edit_distance of 0 is NOT possible)
349 |                 if shortest_edit_distance == 1 {
350 |                     return 1;
351 |                 }
352 |             }
353 |         }
354 |     }
355 |     shortest_edit_distance.try_into().unwrap()
356 | }
357 | 
358 | /// Calculate the mean edit distance between all pairs of words on the list.
359 | pub fn find_mean_edit_distance(list: &[String]) -> f64 {
360 |     let mut sum_of_all_edit_distances: f64 = 0.0;
361 |     let mut number_of_edit_distances_measured: f64 = 0.0;
362 |     for (i, word1) in list.iter().enumerate() {
363 |         // The list[0..i] upper-bound in this inner loop is so that we don't do
364 |         // twice as many calls as necessary. Otherwise we would be finding the
365 |         // edit distance from word1 -> word2 and word2 -> word1.
366 |         // This also loop helpfully prevents us from checking a word's edit
367 |         // distance to itself (0).
368 |         for word2 in list[0..i].iter() {
369 |             let this_edit_distance = find_edit_distance(word1, word2);
370 |             number_of_edit_distances_measured += 1.0;
371 |             sum_of_all_edit_distances += this_edit_distance as f64;
372 |         }
373 |     }
374 |     eprintln!(
375 |         "Number of edit distances recorded: {}\nSum of all edit distances: {}",
376 |         number_of_edit_distances_measured, sum_of_all_edit_distances
377 |     );
378 |     sum_of_all_edit_distances / number_of_edit_distances_measured
379 | }
380 | 
381 | /// Nested loops in this function get the `longest_shared_prefix`
382 | /// between any two words on the given list. Returns length of this
383 | /// longest shared prefix, a notable cryptographic metric.
384 | /// Optionally takes longest_word_length to speed up process.
385 | pub fn find_longest_shared_prefix(list: &[String], longest_word_length: Option<usize>) -> usize {
386 |     let mut longest_shared_prefix = 0;
387 | 
388 |     // If longest_word_length is given, use that. If not,
389 |     // calculate it here.
390 |     let longest_word_length = match longest_word_length {
391 |         Some(longest_word_length) => longest_word_length,
392 |         None => count_characters(
393 |             list.iter()
394 |                 .max_by(|a, b| count_characters(a).cmp(&count_characters(b)))
395 |                 .unwrap(),
396 |         ),
397 |     };
398 |     for word1 in list {
399 |         for word2 in list {
400 |             if word1 != word2 {
401 |                 // Here we convert from zero-indexed first different to
402 |                 // the (1-indexed) length of the long shared prefix, so we don't
403 |                 // need a `- 1`.
404 |                 let this_shared_prefix_length =
405 |                     find_first_different_character_zero_indexed(word1, word2);
406 |                 if this_shared_prefix_length > longest_shared_prefix {
407 |                     longest_shared_prefix = this_shared_prefix_length;
408 |                 }
409 |                 // If we found a shared prefix that's only one fewer than the longest word on
410 |                 // the list, we know this is the longest shared prefix we'll ever find.
411 |                 // We can short-circuit return to save time.
412 |                 if this_shared_prefix_length == longest_word_length - 1 {
413 |                     return this_shared_prefix_length;
414 |                 }
415 |             }
416 |         }
417 |     }
418 |     longest_shared_prefix
419 | }
420 | 
421 | /// Given 2 words, finds the index of the first character that is
422 | /// **different** within them.
423 | /// ```
424 | /// use tidy::display_information::find_first_different_character_zero_indexed;
425 | ///
426 | /// assert_eq!(
427 | ///     find_first_different_character_zero_indexed("hello", "help"), 3
428 | ///     // First **different** character is `l` vs. `p`.
429 | /// );
430 | ///
431 | /// // Handles words of different length by falling back to the length of the shorter
432 | /// // of the two words:
433 | /// assert_eq!(
434 | ///     find_first_different_character_zero_indexed("zip", "zippy"), 3
435 | /// );
436 | /// assert_eq!(
437 | ///     find_first_different_character_zero_indexed("zippy", "zip"), 3
438 | /// );
439 | /// ```
440 | pub fn find_first_different_character_zero_indexed(word1: &str, word2: &str) -> usize {
441 |     for (i, c1) in word1.chars().enumerate() {
442 |         match word2.chars().nth(i) {
443 |             Some(c2) => {
444 |                 if c1 != c2 {
445 |                     return i;
446 |                 } else {
447 |                     continue;
448 |                 }
449 |             }
450 |             // word1 is longer than word2
451 |             None => {
452 |                 return count_characters(word2);
453 |             }
454 |         }
455 |     }
456 |     // Fall back to shorter word length
457 |     if count_characters(word1) < count_characters(word2) {
458 |         count_characters(word1)
459 |     } else {
460 |         count_characters(word2)
461 |     }
462 | }
463 | 
464 | /// Checks if a list has any words that are prefixs of other
465 | /// words on the list.
466 | fn has_prefix_words(list: &[String]) -> bool {
467 |     for word1 in list {
468 |         for word2 in list {
469 |             if word1 != word2 && word1.starts_with(word2) {
470 |                 return true;
471 |             }
472 |         }
473 |     }
474 |     false
475 | }
476 | 
477 | /// Checks if a list has any words that are suffixes of other
478 | /// words on the list.
479 | fn has_suffix_words(list: &[String]) -> bool {
480 |     for word1 in list {
481 |         for word2 in list {
482 |             if word1 != word2 && word1.ends_with(word2) {
483 |                 return true;
484 |             }
485 |         }
486 |     }
487 |     false
488 | }
489 | 
490 | /// Assuming that users get a passphrase consisting solely of
491 | /// the shortest word on the list, we want to check against
492 | /// a brute-force attack in exactly that situation. To do so,
493 | /// we calculate a value I'm calling "assumed entropy per character".
494 | ///
495 | /// If this value is above `log2(26)` or 4.7 bits, there's a chance
496 | /// that we'd _over_-estimate the entropy of passphrases created
497 | /// using the word list.
498 | pub fn assumed_entropy_per_character(list: &[String]) -> f64 {
499 |     let shortest_word_length = get_shortest_word_length(list) as f64;
500 |     let assumed_entropy_per_word = calc_entropy_per_word(list.len());
501 | 
502 |     assumed_entropy_per_word / shortest_word_length
503 | }
504 | 
505 | /// Calculates the "efficiency" of the list.
506 | /// Basically this is the number of bits of entropy generated by
507 | /// the AVERAGE character. Thus it is different from
508 | /// `assumed_entropy_per_word`, which you can think of as
509 | /// the "worst case scenario" (user getting only words of the SHORTEST
510 | /// length in their passphrase).
511 | pub fn efficiency_per_character(list: &[String]) -> f64 {
512 |     let mean_word_length = mean_word_length(list) as f64;
513 |     let entropy_per_word = calc_entropy_per_word(list.len());
514 | 
515 |     entropy_per_word / mean_word_length
516 | }
517 | 
518 | /// This function returns a bool based on whether the list fulfills something
519 | /// called the McMillan Inequality
520 | /// See: <https://www.youtube.com/watch?v=yHw1ka-4g0s>
521 | pub fn satisfies_kraft_mcmillan(list: &[String]) -> KraftMcmillanOutcome {
522 |     let alphabet_size = count_unique_characters(list);
523 |     let mut running_total: f64 = 0.0;
524 |     for word in list {
525 |         running_total +=
526 |             1.0 / (alphabet_size.pow(count_characters(word).try_into().unwrap()) as f64);
527 |     }
528 |     if running_total <= 1.0 {
529 |         KraftMcmillanOutcome::Satisfied
530 |     } else {
531 |         KraftMcmillanOutcome::NotSatisfied
532 |     }
533 | }
534 | 
535 | /// Helper function for calculating the Kraft McMillian Inequality
536 | fn count_unique_characters(list: &[String]) -> usize {
537 |     let mut characters = vec![];
538 |     for word in list {
539 |         for l in word.chars() {
540 |             characters.push(l);
541 |         }
542 |     }
543 |     characters.sort();
544 |     characters.dedup();
545 |     characters.len()
546 | }
547 | 
548 | /// A simple helper function that gets the shortest word on
549 | /// a list.
550 | pub fn get_shortest_word_length(list: &[String]) -> usize {
551 |     count_characters(
552 |         list.iter()
553 |             .min_by(|a, b| count_characters(a).cmp(&count_characters(b)))
554 |             .unwrap(),
555 |     )
556 | }
557 | 
558 | /// Calculates mean (or average) word length of given word
559 | /// list
560 | pub fn mean_word_length(list: &[String]) -> f32 {
561 |     list.iter()
562 |         .map(|word| count_characters(word))
563 |         .sum::<usize>() as f32
564 |         / list.len() as f32
565 | }
566 | 
567 | fn print_samples(samples: Vec<String>) {
568 |     eprintln!("\nWord samples");
569 |     eprintln!("------------");
570 |     for n in 0..30 {
571 |         if n != 0 && n % 6 == 0 {
572 |             // if we're at the end of the 6th word,
573 |             // print a newline
574 |             eprintln!();
575 |         } else if n != 0 {
576 |             // else just print a space to go between each
577 |             // word
578 |             eprint!(" ");
579 |         }
580 |         eprint!("{}", samples[n]);
581 |     }
582 |     eprintln!();
583 | }
584 | 


--------------------------------------------------------------------------------
/tests/list_manipulation_tests.rs:
--------------------------------------------------------------------------------
  1 | mod list_manipulation_tests {
  2 |     use tidy::dice::print_as_dice; // not exactly sure why I need this here...
  3 |     use tidy::list_manipulations::reverse_all_words;
  4 |     use tidy::*;
  5 | 
  6 |     fn make_lists() -> (Vec<String>, Vec<String>, Vec<String>, Vec<String>) {
  7 |         (
  8 |             vec![
  9 |                 "  zookeeper",
 10 |                 "cHarLie",
 11 |                 "keeper",
 12 |                 "app",
 13 |                 "tea",
 14 |                 "addiction",
 15 |                 "zoo",
 16 |                 "keeper",
 17 |                 "stationary ",
 18 |                 "tea",
 19 |                 "station",
 20 |                 "apple",
 21 |                 "sécréter",
 22 |                 "séc",
 23 |                 "actor",
 24 |             ]
 25 |             .iter()
 26 |             .map(|x| x.to_string())
 27 |             .collect(),
 28 |             vec![
 29 |                 "  wizard  ",
 30 |                 "ardoR",
 31 |                 "tea",
 32 |                 "11225	active",
 33 |                 "   ",
 34 |                 "11152	acclaim",
 35 |                 "its456",
 36 |                 "11156	word	tabs",
 37 |                 "19-6-8 clad",
 38 |                 "be",
 39 |                 "I",
 40 |                 "vAcation",
 41 |                 "take",
 42 |                 "world999",
 43 |                 "",
 44 |                 "mistake",
 45 |                 "tee",
 46 |                 "post-modern",
 47 |                 "13910 word with spaces in it",
 48 |                 "comma,203478",
 49 |                 "“smart”",
 50 |                 "‘quotes’",
 51 |                 "  h as spaces ",
 52 |             ]
 53 |             .iter()
 54 |             .map(|x| x.to_string())
 55 |             .collect(),
 56 |             vec![
 57 |                 "Normal",
 58 |                 "the,2048",
 59 |                 "اج 12",
 60 |                 "11225	tab",
 61 |                 "11152 space",
 62 |                 "11156	word	tabs",
 63 |                 "word-with-hypens",
 64 |                 "Uppercase",
 65 |                 "hello109823",
 66 |                 "   ",
 67 |                 "",
 68 |                 "13910 word with spaces in it",
 69 |                 "comma,203478",
 70 |                 "京",
 71 |                 "can't",
 72 |                 "\"dumb quotes\"",
 73 |             ]
 74 |             .iter()
 75 |             .map(|x| x.to_string())
 76 |             .collect(),
 77 |             vec![
 78 |                 "énigme", "enlever", "abbey", "zoo", "Zambia", "eager", "ezra", "año", "antena",
 79 |                 "anaconda", "aptitude",
 80 |             ]
 81 |             .iter()
 82 |             .map(|word| word.to_string())
 83 |             .collect(),
 84 |         )
 85 |     }
 86 | 
 87 |     #[test]
 88 |     fn can_remove_duplicate_words() {
 89 |         let this_tidy_request = TidyRequest {
 90 |             list: make_lists().0,
 91 |             ..Default::default()
 92 |         };
 93 |         let new_list = tidy_list(this_tidy_request);
 94 |         assert!(new_list.contains(&"tea".to_string()));
 95 |         assert!(new_list.contains(&"apple".to_string()));
 96 |         assert!(new_list.len() == make_lists().0.len() - 2);
 97 |     }
 98 | 
 99 |     #[test]
100 |     fn can_sort_words_alphabetically() {
101 |         let this_tidy_request = TidyRequest {
102 |             list: make_lists().0,
103 |             sort_alphabetically: true,
104 |             locale: "en-US".to_string(),
105 |             ..Default::default()
106 |         };
107 |         let new_list = tidy_list(this_tidy_request);
108 |         assert!(new_list[0] == "actor".to_string());
109 |         assert!(new_list.contains(&"station".to_string()));
110 |         assert!(new_list[new_list.len() - 1] == "zookeeper".to_string());
111 |     }
112 | 
113 |     #[test]
114 |     fn respect_option_to_not_sort_alphabetically() {
115 |         let this_tidy_request = TidyRequest {
116 |             list: make_lists().0,
117 |             sort_alphabetically: false,
118 |             ..Default::default()
119 |         };
120 |         let new_list = tidy_list(this_tidy_request);
121 |         assert!(new_list[0] == "zookeeper".to_string());
122 |         assert!(new_list.contains(&"apple".to_string()));
123 |         assert_eq!(new_list[new_list.len() - 4], "apple".to_string());
124 |     }
125 | 
126 |     #[test]
127 |     fn can_sort_by_length() {
128 |         let this_tidy_request = TidyRequest {
129 |             list: make_lists().0,
130 |             sort_by_length: true,
131 |             locale: "en-US".to_string(),
132 |             ..Default::default()
133 |         };
134 |         let new_list = tidy_list(this_tidy_request);
135 |         assert!(new_list[0] == "stationary".to_string());
136 |         assert!(new_list[1] == "addiction".to_string());
137 |     }
138 | 
139 |     #[test]
140 |     fn removes_blank_lines() {
141 |         let this_tidy_request = TidyRequest {
142 |             list: make_lists().1,
143 |             ..Default::default()
144 |         };
145 |         let new_list = tidy_list(this_tidy_request);
146 |         assert!(new_list.len() == make_lists().1.len() - 2);
147 |     }
148 | 
149 |     #[test]
150 |     fn can_take_first_3_elements() {
151 |         let this_tidy_request = TidyRequest {
152 |             list: make_lists().1,
153 |             take_first: Some(4),
154 |             ..Default::default()
155 |         };
156 |         let new_list = tidy_list(this_tidy_request);
157 |         println!("List length now {}: {:?}", new_list.len(), new_list);
158 |         assert_eq!(new_list.len(), 4);
159 |         assert_ne!(new_list.len(), 3);
160 |         assert_ne!(new_list.len(), 15);
161 |     }
162 | 
163 |     #[test]
164 |     fn removes_starting_and_trailing_whitespace() {
165 |         let this_tidy_request = TidyRequest {
166 |             list: make_lists().1,
167 |             ..Default::default()
168 |         };
169 |         let new_list = tidy_list(this_tidy_request);
170 |         assert!(new_list.contains(&"wizard".to_string()));
171 |     }
172 | 
173 |     #[test]
174 |     fn does_not_remove_inner_spaces() {
175 |         let this_tidy_request = TidyRequest {
176 |             list: make_lists().1,
177 |             ..Default::default()
178 |         };
179 |         let new_list = tidy_list(this_tidy_request);
180 |         assert!(new_list.contains(&"h as spaces".to_string()));
181 |     }
182 | 
183 |     #[test]
184 |     fn can_straighten_quotes() {
185 |         let this_tidy_request = TidyRequest {
186 |             list: make_lists().1,
187 |             should_straighten_quotes: true,
188 |             ..Default::default()
189 |         };
190 |         let new_list = tidy_list(this_tidy_request);
191 |         assert!(new_list.contains(&"\"smart\"".to_string()));
192 |         assert!(new_list.contains(&"'quotes'".to_string()));
193 |     }
194 |     #[test]
195 |     fn can_delete_integers_from_words() {
196 |         let this_tidy_request = TidyRequest {
197 |             list: make_lists().1,
198 |             should_delete_integers: true,
199 |             ..Default::default()
200 |         };
201 |         let new_list = tidy_list(this_tidy_request);
202 |         assert!(new_list.contains(&"active".to_string()));
203 |     }
204 | 
205 |     #[test]
206 |     fn can_delete_nonalphanumeric_from_words() {
207 |         let this_tidy_request = TidyRequest {
208 |             list: make_lists().1,
209 |             should_delete_nonalphanumeric: true,
210 |             ..Default::default()
211 |         };
212 |         let new_list = tidy_list(this_tidy_request);
213 |         assert!(new_list.contains(&"1968clad".to_string()));
214 |         assert!(new_list.contains(&"take".to_string()));
215 |     }
216 | 
217 |     #[test]
218 |     fn can_remove_nonalphanumeric_words_from_list() {
219 |         let this_tidy_request = TidyRequest {
220 |             list: make_lists().2,
221 |             should_remove_nonalphanumeric: true,
222 |             ..Default::default()
223 |         };
224 |         let new_list = tidy_list(this_tidy_request);
225 | 
226 |         assert!(new_list.contains(&"Uppercase".to_string()));
227 |         assert!(new_list.contains(&"京".to_string()));
228 |         assert!(new_list.contains(&"hello109823".to_string()));
229 |         assert!(!new_list.contains(&"word-with-hypens".to_string()));
230 |         assert!(!new_list.contains(&"comma,203478".to_string()));
231 |         assert!(!new_list.contains(&"اج 12".to_string()));
232 |     }
233 | 
234 |     #[test]
235 |     fn can_remove_nonalphabetic_words_from_list() {
236 |         let this_tidy_request = TidyRequest {
237 |             list: make_lists().2,
238 |             should_remove_nonalphabetic: true,
239 |             ..Default::default()
240 |         };
241 |         let new_list = tidy_list(this_tidy_request);
242 | 
243 |         assert!(new_list.contains(&"Uppercase".to_string()));
244 |         assert!(new_list.contains(&"京".to_string()));
245 |         assert!(!new_list.contains(&"hello109823".to_string()));
246 |         assert!(!new_list.contains(&"word-with-hypens".to_string()));
247 |         assert!(!new_list.contains(&"comma,203478".to_string()));
248 |         assert!(!new_list.contains(&"اج 12".to_string()));
249 |     }
250 |     #[test]
251 |     fn can_remove_non_latin_alphabetic_words_from_list() {
252 |         let this_tidy_request = TidyRequest {
253 |             list: make_lists().2,
254 |             should_remove_non_latin_alphabetic: true,
255 |             ..Default::default()
256 |         };
257 |         let new_list = tidy_list(this_tidy_request);
258 | 
259 |         assert!(new_list.contains(&"Uppercase".to_string()));
260 |         assert!(!new_list.contains(&"京".to_string()));
261 |         assert!(!new_list.contains(&"hello109823".to_string()));
262 |         assert!(!new_list.contains(&"word-with-hypens".to_string()));
263 |         assert!(!new_list.contains(&"comma,203478".to_string()));
264 |         assert!(!new_list.contains(&"اج 12".to_string()));
265 |     }
266 | 
267 |     #[test]
268 |     fn can_remove_non_ascii_words_from_list() {
269 |         let this_tidy_request = TidyRequest {
270 |             list: make_lists().2,
271 |             should_remove_nonascii: true,
272 |             ..Default::default()
273 |         };
274 |         let new_list = tidy_list(this_tidy_request);
275 | 
276 |         assert!(new_list.contains(&"Uppercase".to_string()));
277 |         assert!(new_list.contains(&"hello109823".to_string()));
278 |         assert!(new_list.contains(&"word-with-hypens".to_string()));
279 |         assert!(new_list.contains(&"comma,203478".to_string()));
280 |         assert!(!new_list.contains(&"京".to_string()));
281 |         assert!(!new_list.contains(&"اج 12".to_string()));
282 |     }
283 | 
284 |     #[test]
285 |     fn can_delete_before_first_tab() {
286 |         let this_tidy_request = TidyRequest {
287 |             list: make_lists().1,
288 |             should_delete_before_first_delimiter: Some('\t'),
289 |             ..Default::default()
290 |         };
291 |         let new_list = tidy_list(this_tidy_request);
292 |         assert!(new_list.contains(&"active".to_string()));
293 |         assert!(new_list.contains(&"acclaim".to_string()));
294 |         // Only remove through FIRST tab
295 |         assert!(new_list.contains(&"word\ttabs".to_string()));
296 |     }
297 | 
298 |     #[test]
299 |     fn can_delete_before_first_space() {
300 |         let this_tidy_request = TidyRequest {
301 |             list: make_lists().1,
302 |             should_delete_before_first_delimiter: Some(' '),
303 |             ..Default::default()
304 |         };
305 |         let new_list = tidy_list(this_tidy_request);
306 |         assert!(new_list.contains(&"clad".to_string()));
307 |         // Check that it only removes characters through first space, rather than just
308 |         // between first space and second space, for example
309 |         assert!(new_list.contains(&"word with spaces in it".to_string()));
310 |         // Tidy trims leading whitespace first, so the "h"
311 |         // will be cut here.
312 |         assert!(new_list.contains(&"as spaces".to_string()));
313 |     }
314 |     #[test]
315 |     fn can_delete_before_first_comma() {
316 |         let this_tidy_request = TidyRequest {
317 |             list: make_lists().1,
318 |             should_delete_before_first_delimiter: Some(','),
319 |             ..Default::default()
320 |         };
321 |         let new_list = tidy_list(this_tidy_request);
322 |         assert!(new_list.contains(&"203478".to_string()));
323 |         assert!(new_list.contains(&"h as spaces".to_string()));
324 |     }
325 | 
326 |     #[test]
327 |     fn can_delete_after_first_tab() {
328 |         let this_tidy_request = TidyRequest {
329 |             list: make_lists().1,
330 |             should_delete_after_first_delimiter: Some('\t'),
331 |             ..Default::default()
332 |         };
333 |         let new_list = tidy_list(this_tidy_request);
334 |         assert!(new_list.contains(&"11225".to_string()));
335 |         assert!(new_list.contains(&"11152".to_string()));
336 |         // remove after FIRST tab
337 |         assert!(new_list.contains(&"11156".to_string()));
338 |     }
339 |     #[test]
340 |     fn can_delete_after_first_space() {
341 |         let this_tidy_request = TidyRequest {
342 |             list: make_lists().1,
343 |             should_delete_after_first_delimiter: Some(' '),
344 |             ..Default::default()
345 |         };
346 |         let new_list = tidy_list(this_tidy_request);
347 |         assert!(new_list.contains(&"19-6-8".to_string()));
348 |         assert!(new_list.contains(&"13910".to_string()));
349 |         assert!(new_list.contains(&"post-modern".to_string()));
350 |         assert!(new_list.contains(&"comma,203478".to_string()));
351 |     }
352 |     #[test]
353 |     fn can_delete_after_first_comma() {
354 |         let this_tidy_request = TidyRequest {
355 |             list: make_lists().1,
356 |             should_delete_after_first_delimiter: Some(','),
357 |             ..Default::default()
358 |         };
359 |         let new_list = tidy_list(this_tidy_request);
360 |         assert!(new_list.contains(&"comma".to_string()));
361 |         assert!(new_list.contains(&"h as spaces".to_string()));
362 |     }
363 | 
364 |     #[test]
365 |     fn can_lowercase_words() {
366 |         let this_tidy_request = TidyRequest {
367 |             list: make_lists().0,
368 |             to_lowercase: true,
369 |             ..Default::default()
370 |         };
371 |         let new_list = tidy_list(this_tidy_request);
372 |         assert!(new_list.contains(&"charlie".to_string()));
373 |         let this_tidy_request = TidyRequest {
374 |             list: make_lists().1,
375 |             to_lowercase: true,
376 |             ..Default::default()
377 |         };
378 |         let new_list = tidy_list(this_tidy_request);
379 |         assert!(new_list.contains(&"vacation".to_string()));
380 |         assert!(new_list.contains(&"ardor".to_string()));
381 |     }
382 | 
383 |     #[test]
384 |     fn can_remove_prefix_words() {
385 |         let this_tidy_request = TidyRequest {
386 |             list: make_lists().0,
387 |             should_remove_prefix_words: true,
388 |             ..Default::default()
389 |         };
390 |         let new_list = tidy_list(this_tidy_request);
391 |         assert!(!new_list.contains(&"station".to_string()));
392 |         assert!(new_list.contains(&"stationary".to_string()));
393 |         assert!(!new_list.contains(&"zoo".to_string()));
394 |         assert!(new_list.contains(&"zookeeper".to_string()));
395 |         assert!(new_list.contains(&"apple".to_string()));
396 |     }
397 | 
398 |     #[test]
399 |     fn can_remove_a_prefix_word_that_has_accents() {
400 |         let this_tidy_request = TidyRequest {
401 |             list: make_lists().0,
402 |             should_remove_prefix_words: true,
403 |             ..Default::default()
404 |         };
405 |         let new_list = tidy_list(this_tidy_request);
406 |         assert!(!new_list.contains(&"séc".to_string()));
407 |         assert!(new_list.contains(&"sécréter".to_string()));
408 |     }
409 | 
410 |     #[test]
411 |     fn can_remove_suffix_words() {
412 |         let this_tidy_request = TidyRequest {
413 |             list: make_lists().0,
414 |             should_remove_suffix_words: true,
415 |             ..Default::default()
416 |         };
417 |         let new_list = tidy_list(this_tidy_request);
418 |         assert!(!new_list.contains(&"keeper".to_string()));
419 |         assert!(new_list.contains(&"apple".to_string()));
420 |     }
421 | 
422 |     #[test]
423 |     fn can_remove_words_with_nonalphanumeric_characters() {
424 |         let this_tidy_request = TidyRequest {
425 |             list: make_lists().1,
426 |             should_remove_nonalphanumeric: true,
427 |             ..Default::default()
428 |         };
429 |         let new_list = tidy_list(this_tidy_request);
430 |         assert!(!new_list.contains(&"19-6-8 clad".to_string()));
431 |         assert!(new_list.contains(&"world999".to_string()));
432 |         assert!(new_list.contains(&"take".to_string()));
433 |     }
434 | 
435 |     #[test]
436 |     fn can_remove_words_with_nonalphabetic_characters() {
437 |         let this_tidy_request = TidyRequest {
438 |             list: make_lists().1,
439 |             should_remove_nonalphabetic: true,
440 |             ..Default::default()
441 |         };
442 |         let new_list = tidy_list(this_tidy_request);
443 |         assert!(!new_list.contains(&"19-6-8 clad".to_string()));
444 |         assert!(!new_list.contains(&"world999".to_string()));
445 |         assert!(!new_list.contains(&"world".to_string()));
446 |         assert!(!new_list.contains(&"post-modern".to_string()));
447 |         assert!(!new_list.contains(&"postmodern".to_string()));
448 |         assert!(new_list.contains(&"take".to_string()));
449 |         assert!(new_list.contains(&"wizard".to_string()));
450 |         assert!(new_list.contains(&"vAcation".to_string()));
451 |     }
452 | 
453 |     #[test]
454 |     fn can_remove_words_with_integers() {
455 |         let this_tidy_request = TidyRequest {
456 |             list: make_lists().1,
457 |             should_remove_integers: true,
458 |             ..Default::default()
459 |         };
460 |         let new_list = tidy_list(this_tidy_request);
461 |         assert!(!new_list.contains(&"19-6-8 clad".to_string()));
462 |         assert!(!new_list.contains(&"world999".to_string()));
463 |         assert!(new_list.contains(&"be".to_string()));
464 |         assert!(new_list.contains(&"I".to_string()));
465 |     }
466 | 
467 |     #[test]
468 |     fn can_remove_words_shorter_than_a_specified_minimum_length() {
469 |         let this_tidy_request = TidyRequest {
470 |             list: make_lists().1,
471 |             minimum_length: Some(3),
472 |             ..Default::default()
473 |         };
474 |         let new_list = tidy_list(this_tidy_request);
475 |         assert!(!new_list.contains(&"I".to_string()));
476 |         assert!(!new_list.contains(&"be".to_string()));
477 |         assert!(new_list.contains(&"tea".to_string()));
478 |         assert!(new_list.contains(&"mistake".to_string()));
479 |     }
480 | 
481 |     #[test]
482 |     fn can_remove_words_longer_than_a_specified_minimum_and_maximum_length() {
483 |         let this_tidy_request = TidyRequest {
484 |             list: make_lists().0,
485 |             minimum_length: Some(4),
486 |             maximum_length: Some(7),
487 |             ..Default::default()
488 |         };
489 |         let new_list = tidy_list(this_tidy_request);
490 |         assert!(!new_list.contains(&"addiction".to_string()));
491 |         assert!(!new_list.contains(&"zookeeper".to_string()));
492 |         assert!(!new_list.contains(&"stationary".to_string()));
493 |         assert!(!new_list.contains(&"its".to_string()));
494 |         assert!(!new_list.contains(&"its456".to_string()));
495 |         assert!(!new_list.contains(&"tea".to_string()));
496 |         assert!(new_list.contains(&"station".to_string()));
497 |     }
498 | 
499 |     #[test]
500 |     fn can_remove_words_longer_than_a_specified_maximum_length_after_deleting_integers() {
501 |         let this_tidy_request = TidyRequest {
502 |             list: make_lists().1,
503 |             should_delete_integers: true,
504 |             maximum_length: Some(7),
505 |             ..Default::default()
506 |         };
507 |         let new_list = tidy_list(this_tidy_request);
508 |         assert!(new_list.contains(&"active".to_string()));
509 |         assert!(new_list.contains(&"acclaim".to_string()));
510 |         assert!(!new_list.contains(&"word with spacaes in it".to_string()));
511 |     }
512 | 
513 |     #[test]
514 |     fn can_gurantee_a_maximum_length_of_shared_prefix_for_autocomplete() {
515 |         let this_tidy_request = TidyRequest {
516 |             list: make_lists().0,
517 |             maximum_shared_prefix_length: Some(3),
518 |             ..Default::default()
519 |         };
520 |         let new_list = tidy_list(this_tidy_request);
521 |         assert!(new_list.contains(&"zoo".to_string()));
522 |         assert!(!new_list.contains(&"zookeeper".to_string()));
523 |         assert!(new_list.contains(&"station".to_string()));
524 |         assert!(!new_list.contains(&"stationary".to_string()));
525 |         assert!(new_list.contains(&"app".to_string()));
526 |         assert!(!new_list.contains(&"apple".to_string()));
527 |     }
528 | 
529 |     #[test]
530 |     fn can_remove_reject_words() {
531 |         let words_to_reject: Vec<String> = vec!["mistake", "carnival"]
532 |             .iter()
533 |             .map(|x| x.to_string())
534 |             .collect();
535 | 
536 |         let this_tidy_request = TidyRequest {
537 |             list: make_lists().1,
538 |             reject_list: Some(words_to_reject),
539 |             to_lowercase: true,
540 |             ..Default::default()
541 |         };
542 |         let new_list = tidy_list(this_tidy_request);
543 |         assert!(!new_list.contains(&"mistake".to_string()));
544 |         assert!(!new_list.contains(&"carnival".to_string()));
545 |         assert!(new_list.contains(&"wizard".to_string()));
546 |     }
547 | 
548 |     #[test]
549 |     fn can_remove_all_words_not_on_approved_list_words() {
550 |         let approved_words: Vec<String> = vec!["take", "vAcation", "airplane"]
551 |             .iter()
552 |             .map(|x| x.to_string())
553 |             .collect();
554 | 
555 |         let this_tidy_request = TidyRequest {
556 |             list: make_lists().1,
557 |             approved_list: Some(approved_words),
558 |             ..Default::default()
559 |         };
560 |         let new_list = tidy_list(this_tidy_request);
561 |         assert!(new_list.contains(&"take".to_string()));
562 |         assert!(new_list.contains(&"vAcation".to_string()));
563 |         assert!(!new_list.contains(&"carnival".to_string()));
564 |         assert!(!new_list.contains(&"wizard".to_string()));
565 |         assert!(!new_list.contains(&"airplane".to_string()));
566 |     }
567 | 
568 |     #[test]
569 |     fn can_remove_specified_homophones() {
570 |         let homophone1 = ("be".to_string(), "bee".to_string());
571 |         let homophone2 = ("right".to_string(), "write".to_string());
572 |         let homophone3 = ("tea".to_string(), "tee".to_string());
573 |         let this_tidy_request = TidyRequest {
574 |             list: make_lists().1,
575 |             homophones_list: Some(vec![homophone1, homophone2, homophone3]),
576 |             to_lowercase: true,
577 |             ..Default::default()
578 |         };
579 |         let new_list = tidy_list(this_tidy_request);
580 |         assert!(new_list.contains(&"tea".to_string()));
581 |         assert!(!new_list.contains(&"tee".to_string()));
582 |         assert!(new_list.contains(&"be".to_string()));
583 |         assert!(!new_list.contains(&"bee".to_string()));
584 |         assert!(new_list.contains(&"mistake".to_string()));
585 |     }
586 | 
587 |     #[test]
588 |     fn can_sort_accented_and_capitalized_letters_properly() {
589 |         let this_tidy_request = TidyRequest {
590 |             list: make_lists().3,
591 |             sort_alphabetically: true,
592 |             locale: "es-ES".to_string(),
593 |             normalization_form: Some("nfkd".to_string()),
594 |             ..Default::default()
595 |         };
596 |         let new_list = tidy_list(this_tidy_request);
597 | 
598 |         let how_list_should_be_sorted: Vec<String> = vec![
599 |             "abbey",
600 |             "anaconda",
601 |             "antena",
602 |             "año",
603 |             "aptitude",
604 |             "eager",
605 |             &normalize_unicode("énigme", "nfkd").unwrap(),
606 |             "enlever",
607 |             "ezra",
608 |             "Zambia",
609 |             "zoo",
610 |         ]
611 |         .iter()
612 |         .map(|word| word.to_string())
613 |         .collect();
614 |         assert_eq!(new_list, how_list_should_be_sorted);
615 |     }
616 | 
617 |     // this is really a WORD manipulation, so maybe should be in a
618 |     // different test file
619 |     use tidy::list_manipulations::normalize_unicode;
620 |     #[test]
621 |     fn can_normalize_unicode_in_a_given_word() {
622 |         let word_with_combined_accents = "sécréter";
623 |         let word_with_two_char_accents = "sécréter";
624 |         assert_eq!(
625 |             word_with_combined_accents,
626 |             normalize_unicode(word_with_combined_accents, "nfc").unwrap()
627 |         );
628 |         assert_eq!(
629 |             word_with_combined_accents,
630 |             normalize_unicode(word_with_combined_accents, "nfkc").unwrap()
631 |         );
632 |         assert_eq!(
633 |             word_with_two_char_accents,
634 |             normalize_unicode(word_with_two_char_accents, "nfd").unwrap()
635 |         );
636 |         assert_eq!(
637 |             word_with_two_char_accents,
638 |             normalize_unicode(word_with_two_char_accents, "nfkd").unwrap()
639 |         );
640 |     }
641 |     #[test]
642 |     fn can_accurately_count_characters() {
643 |         let normal_word = "normal";
644 |         assert_eq!(count_characters(normal_word), 6);
645 | 
646 |         // These two words below seem the same, don't they?
647 |         let word_with_combined_accents = "sécréter";
648 |         let word_with_two_char_accents = "sécréter";
649 | 
650 |         // Oh, you sweet summer child...
651 |         assert_ne!(
652 |             word_with_combined_accents.chars().count(),
653 |             word_with_two_char_accents.chars().count()
654 |         );
655 |         // Hence, my count_characters function, which normalizes
656 |         // Unicopde via NFC before counting the length of given string slice
657 |         // I chose NFC because it seems to be closest to how human read/count
658 |         // letters (e.g. and accented e always counts as 1 character).
659 |         assert_eq!(count_characters(word_with_combined_accents), 8);
660 |         assert_eq!(count_characters(word_with_two_char_accents), 8);
661 | 
662 |         let emojis = "😀😃😄😁😆";
663 |         assert_eq!(count_characters(emojis), 5);
664 |     }
665 | 
666 |     #[test]
667 |     fn can_accurately_count_characters_of_nfc_and_nfkd_normalized_words() {
668 |         let word_with_combined_accents = "sécréter";
669 |         let word_with_two_char_accents = "sécréter";
670 |         assert_eq!(
671 |             normalize_unicode(word_with_combined_accents, "nfc")
672 |                 .unwrap()
673 |                 .chars()
674 |                 .count(),
675 |             normalize_unicode(word_with_two_char_accents, "nfc")
676 |                 .unwrap()
677 |                 .chars()
678 |                 .count()
679 |         );
680 |         assert_eq!(
681 |             normalize_unicode(word_with_combined_accents, "nfkd")
682 |                 .unwrap()
683 |                 .chars()
684 |                 .count(),
685 |             normalize_unicode(word_with_two_char_accents, "nfkd")
686 |                 .unwrap()
687 |                 .chars()
688 |                 .count()
689 |         );
690 |     }
691 | 
692 |     #[test]
693 |     fn can_reverse_list() {
694 |         let list = vec![
695 |             "hotdog".to_string(),
696 |             "hamburger".to_string(),
697 |             "alligator".to_string(),
698 |             "😀😁😆".to_string(),
699 |         ];
700 |         let rev_list = reverse_all_words(&list);
701 |         assert_eq!(rev_list, ["godtoh", "regrubmah", "rotagilla", "😆😁😀"]);
702 |     }
703 | 
704 |     #[test]
705 |     fn can_print_dice_rolls_of_base_6() {
706 |         assert_eq!(print_as_dice(0, 6, 7776, false), "11111".to_string());
707 |         assert_eq!(print_as_dice(7775, 6, 7776, false), "66666".to_string());
708 |         assert_eq!(print_as_dice(2548, 6, 7776, false), "26555".to_string());
709 |         assert_eq!(print_as_dice(2548, 6, 7000, false), "26555".to_string());
710 |     }
711 |     #[test]
712 |     fn can_print_dice_rolls_of_base_2() {
713 |         assert_eq!(print_as_dice(1, 2, 7776, true), "0000000000001".to_string());
714 |         assert_eq!(print_as_dice(127, 2, 128, true), "1111111".to_string());
715 |     }
716 |     #[test]
717 |     fn can_print_dice_rolls_of_base_20() {
718 |         assert_eq!(print_as_dice(1000, 20, 8000, false), "03-11-01".to_string());
719 |         assert_eq!(print_as_dice(1000, 20, 8000, true), "2A0".to_string());
720 |     }
721 | }
722 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
   1 | # This file is automatically @generated by Cargo.
   2 | # It is not intended for manual editing.
   3 | version = 4
   4 | 
   5 | [[package]]
   6 | name = "anstream"
   7 | version = "0.6.21"
   8 | source = "registry+https://github.com/rust-lang/crates.io-index"
   9 | checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
  10 | dependencies = [
  11 |  "anstyle",
  12 |  "anstyle-parse",
  13 |  "anstyle-query",
  14 |  "anstyle-wincon",
  15 |  "colorchoice",
  16 |  "is_terminal_polyfill",
  17 |  "utf8parse",
  18 | ]
  19 | 
  20 | [[package]]
  21 | name = "anstyle"
  22 | version = "1.0.13"
  23 | source = "registry+https://github.com/rust-lang/crates.io-index"
  24 | checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
  25 | 
  26 | [[package]]
  27 | name = "anstyle-parse"
  28 | version = "0.2.7"
  29 | source = "registry+https://github.com/rust-lang/crates.io-index"
  30 | checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
  31 | dependencies = [
  32 |  "utf8parse",
  33 | ]
  34 | 
  35 | [[package]]
  36 | name = "anstyle-query"
  37 | version = "1.1.5"
  38 | source = "registry+https://github.com/rust-lang/crates.io-index"
  39 | checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
  40 | dependencies = [
  41 |  "windows-sys",
  42 | ]
  43 | 
  44 | [[package]]
  45 | name = "anstyle-wincon"
  46 | version = "3.0.11"
  47 | source = "registry+https://github.com/rust-lang/crates.io-index"
  48 | checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
  49 | dependencies = [
  50 |  "anstyle",
  51 |  "once_cell_polyfill",
  52 |  "windows-sys",
  53 | ]
  54 | 
  55 | [[package]]
  56 | name = "autocfg"
  57 | version = "1.5.0"
  58 | source = "registry+https://github.com/rust-lang/crates.io-index"
  59 | checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
  60 | 
  61 | [[package]]
  62 | name = "calendrical_calculations"
  63 | version = "0.2.3"
  64 | source = "registry+https://github.com/rust-lang/crates.io-index"
  65 | checksum = "3a0b39595c6ee54a8d0900204ba4c401d0ab4eb45adaf07178e8d017541529e7"
  66 | dependencies = [
  67 |  "core_maths",
  68 |  "displaydoc",
  69 | ]
  70 | 
  71 | [[package]]
  72 | name = "cfg-if"
  73 | version = "1.0.4"
  74 | source = "registry+https://github.com/rust-lang/crates.io-index"
  75 | checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
  76 | 
  77 | [[package]]
  78 | name = "clap"
  79 | version = "4.5.53"
  80 | source = "registry+https://github.com/rust-lang/crates.io-index"
  81 | checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
  82 | dependencies = [
  83 |  "clap_builder",
  84 |  "clap_derive",
  85 | ]
  86 | 
  87 | [[package]]
  88 | name = "clap_builder"
  89 | version = "4.5.53"
  90 | source = "registry+https://github.com/rust-lang/crates.io-index"
  91 | checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
  92 | dependencies = [
  93 |  "anstream",
  94 |  "anstyle",
  95 |  "clap_lex",
  96 |  "strsim",
  97 | ]
  98 | 
  99 | [[package]]
 100 | name = "clap_derive"
 101 | version = "4.5.49"
 102 | source = "registry+https://github.com/rust-lang/crates.io-index"
 103 | checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
 104 | dependencies = [
 105 |  "heck",
 106 |  "proc-macro2",
 107 |  "quote",
 108 |  "syn",
 109 | ]
 110 | 
 111 | [[package]]
 112 | name = "clap_lex"
 113 | version = "0.7.6"
 114 | source = "registry+https://github.com/rust-lang/crates.io-index"
 115 | checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
 116 | 
 117 | [[package]]
 118 | name = "colorchoice"
 119 | version = "1.0.4"
 120 | source = "registry+https://github.com/rust-lang/crates.io-index"
 121 | checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 122 | 
 123 | [[package]]
 124 | name = "core_maths"
 125 | version = "0.1.1"
 126 | source = "registry+https://github.com/rust-lang/crates.io-index"
 127 | checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30"
 128 | dependencies = [
 129 |  "libm",
 130 | ]
 131 | 
 132 | [[package]]
 133 | name = "displaydoc"
 134 | version = "0.2.5"
 135 | source = "registry+https://github.com/rust-lang/crates.io-index"
 136 | checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 137 | dependencies = [
 138 |  "proc-macro2",
 139 |  "quote",
 140 |  "syn",
 141 | ]
 142 | 
 143 | [[package]]
 144 | name = "either"
 145 | version = "1.15.0"
 146 | source = "registry+https://github.com/rust-lang/crates.io-index"
 147 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 148 | 
 149 | [[package]]
 150 | name = "fixed_decimal"
 151 | version = "0.7.1"
 152 | source = "registry+https://github.com/rust-lang/crates.io-index"
 153 | checksum = "35eabf480f94d69182677e37571d3be065822acfafd12f2f085db44fbbcc8e57"
 154 | dependencies = [
 155 |  "displaydoc",
 156 |  "smallvec",
 157 |  "writeable",
 158 | ]
 159 | 
 160 | [[package]]
 161 | name = "getrandom"
 162 | version = "0.3.4"
 163 | source = "registry+https://github.com/rust-lang/crates.io-index"
 164 | checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 165 | dependencies = [
 166 |  "cfg-if",
 167 |  "libc",
 168 |  "r-efi",
 169 |  "wasip2",
 170 | ]
 171 | 
 172 | [[package]]
 173 | name = "heck"
 174 | version = "0.5.0"
 175 | source = "registry+https://github.com/rust-lang/crates.io-index"
 176 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 177 | 
 178 | [[package]]
 179 | name = "icu"
 180 | version = "2.1.1"
 181 | source = "registry+https://github.com/rust-lang/crates.io-index"
 182 | checksum = "67ab713dd86fa032cb5487f9ac3a85d47b5dcf4c7b8c7dd00210b3cadd6a6551"
 183 | dependencies = [
 184 |  "icu_calendar",
 185 |  "icu_casemap",
 186 |  "icu_collator",
 187 |  "icu_collections",
 188 |  "icu_datetime",
 189 |  "icu_decimal",
 190 |  "icu_experimental",
 191 |  "icu_list",
 192 |  "icu_locale",
 193 |  "icu_normalizer",
 194 |  "icu_pattern",
 195 |  "icu_plurals",
 196 |  "icu_properties",
 197 |  "icu_provider",
 198 |  "icu_segmenter",
 199 |  "icu_time",
 200 | ]
 201 | 
 202 | [[package]]
 203 | name = "icu_calendar"
 204 | version = "2.1.1"
 205 | source = "registry+https://github.com/rust-lang/crates.io-index"
 206 | checksum = "d6f0e52e009b6b16ba9c0693578796f2dd4aaa59a7f8f920423706714a89ac4e"
 207 | dependencies = [
 208 |  "calendrical_calculations",
 209 |  "displaydoc",
 210 |  "icu_calendar_data",
 211 |  "icu_locale",
 212 |  "icu_locale_core",
 213 |  "icu_provider",
 214 |  "ixdtf",
 215 |  "serde",
 216 |  "tinystr",
 217 |  "zerovec",
 218 | ]
 219 | 
 220 | [[package]]
 221 | name = "icu_calendar_data"
 222 | version = "2.1.1"
 223 | source = "registry+https://github.com/rust-lang/crates.io-index"
 224 | checksum = "527f04223b17edfe0bd43baf14a0cb1b017830db65f3950dc00224860a9a446d"
 225 | 
 226 | [[package]]
 227 | name = "icu_casemap"
 228 | version = "2.1.1"
 229 | source = "registry+https://github.com/rust-lang/crates.io-index"
 230 | checksum = "d4ca9983e8bf51223c2f89014fa4eaa9e9b336c47f3af0d000538f86f841fba1"
 231 | dependencies = [
 232 |  "icu_casemap_data",
 233 |  "icu_collections",
 234 |  "icu_locale_core",
 235 |  "icu_properties",
 236 |  "icu_provider",
 237 |  "potential_utf",
 238 |  "writeable",
 239 |  "zerovec",
 240 | ]
 241 | 
 242 | [[package]]
 243 | name = "icu_casemap_data"
 244 | version = "2.1.1"
 245 | source = "registry+https://github.com/rust-lang/crates.io-index"
 246 | checksum = "98d4663d0f99b301033a19e0acf94e9d2fa4b107638580165e5a6ccc49ad1450"
 247 | 
 248 | [[package]]
 249 | name = "icu_collator"
 250 | version = "2.1.1"
 251 | source = "registry+https://github.com/rust-lang/crates.io-index"
 252 | checksum = "32eed11a5572f1088b63fa21dc2e70d4a865e5739fc2d10abc05be93bae97019"
 253 | dependencies = [
 254 |  "icu_collator_data",
 255 |  "icu_collections",
 256 |  "icu_locale",
 257 |  "icu_locale_core",
 258 |  "icu_normalizer",
 259 |  "icu_properties",
 260 |  "icu_provider",
 261 |  "smallvec",
 262 |  "utf16_iter",
 263 |  "utf8_iter",
 264 |  "zerovec",
 265 | ]
 266 | 
 267 | [[package]]
 268 | name = "icu_collator_data"
 269 | version = "2.1.1"
 270 | source = "registry+https://github.com/rust-lang/crates.io-index"
 271 | checksum = "5ab06f0e83a613efddba3e4913e00e43ed4001fae651cb7d40fc7e66b83b6fb9"
 272 | 
 273 | [[package]]
 274 | name = "icu_collections"
 275 | version = "2.1.1"
 276 | source = "registry+https://github.com/rust-lang/crates.io-index"
 277 | checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
 278 | dependencies = [
 279 |  "displaydoc",
 280 |  "potential_utf",
 281 |  "serde",
 282 |  "yoke",
 283 |  "zerofrom",
 284 |  "zerovec",
 285 | ]
 286 | 
 287 | [[package]]
 288 | name = "icu_datetime"
 289 | version = "2.1.1"
 290 | source = "registry+https://github.com/rust-lang/crates.io-index"
 291 | checksum = "1b9d49f41ded8e63761b6b4c3120dfdc289415a1ed10107db6198eb311057ca5"
 292 | dependencies = [
 293 |  "displaydoc",
 294 |  "fixed_decimal",
 295 |  "icu_calendar",
 296 |  "icu_datetime_data",
 297 |  "icu_decimal",
 298 |  "icu_locale",
 299 |  "icu_locale_core",
 300 |  "icu_pattern",
 301 |  "icu_plurals",
 302 |  "icu_provider",
 303 |  "icu_time",
 304 |  "potential_utf",
 305 |  "tinystr",
 306 |  "writeable",
 307 |  "zerovec",
 308 | ]
 309 | 
 310 | [[package]]
 311 | name = "icu_datetime_data"
 312 | version = "2.1.1"
 313 | source = "registry+https://github.com/rust-lang/crates.io-index"
 314 | checksum = "0bf2a384725c67fcd32d27737bc7ba9dc5fe21311dfe3ba530f4b4d53e72bacc"
 315 | 
 316 | [[package]]
 317 | name = "icu_decimal"
 318 | version = "2.1.1"
 319 | source = "registry+https://github.com/rust-lang/crates.io-index"
 320 | checksum = "a38c52231bc348f9b982c1868a2af3195199623007ba2c7650f432038f5b3e8e"
 321 | dependencies = [
 322 |  "fixed_decimal",
 323 |  "icu_decimal_data",
 324 |  "icu_locale",
 325 |  "icu_locale_core",
 326 |  "icu_provider",
 327 |  "serde",
 328 |  "writeable",
 329 |  "zerovec",
 330 | ]
 331 | 
 332 | [[package]]
 333 | name = "icu_decimal_data"
 334 | version = "2.1.1"
 335 | source = "registry+https://github.com/rust-lang/crates.io-index"
 336 | checksum = "2905b4044eab2dd848fe84199f9195567b63ab3a93094711501363f63546fef7"
 337 | 
 338 | [[package]]
 339 | name = "icu_experimental"
 340 | version = "0.4.0"
 341 | source = "registry+https://github.com/rust-lang/crates.io-index"
 342 | checksum = "f4ffa4d60b9cb8b024082afaf9e94d853184e483ec69322c74dc437bf8a882a5"
 343 | dependencies = [
 344 |  "displaydoc",
 345 |  "either",
 346 |  "fixed_decimal",
 347 |  "icu_casemap",
 348 |  "icu_collections",
 349 |  "icu_decimal",
 350 |  "icu_experimental_data",
 351 |  "icu_list",
 352 |  "icu_locale",
 353 |  "icu_locale_core",
 354 |  "icu_normalizer",
 355 |  "icu_pattern",
 356 |  "icu_plurals",
 357 |  "icu_properties",
 358 |  "icu_provider",
 359 |  "litemap",
 360 |  "num-bigint",
 361 |  "num-rational",
 362 |  "num-traits",
 363 |  "potential_utf",
 364 |  "smallvec",
 365 |  "tinystr",
 366 |  "writeable",
 367 |  "zerotrie",
 368 |  "zerovec",
 369 | ]
 370 | 
 371 | [[package]]
 372 | name = "icu_experimental_data"
 373 | version = "0.4.0"
 374 | source = "registry+https://github.com/rust-lang/crates.io-index"
 375 | checksum = "2578ea93f0373bb28800f7d1100e7e771c4d248d0d3759250fed08fa27694139"
 376 | 
 377 | [[package]]
 378 | name = "icu_list"
 379 | version = "2.1.1"
 380 | source = "registry+https://github.com/rust-lang/crates.io-index"
 381 | checksum = "d3a0b7b126e2fc42777d3c348611553d540bd3683caa39b387c5dd1036bb21a8"
 382 | dependencies = [
 383 |  "icu_list_data",
 384 |  "icu_locale",
 385 |  "icu_provider",
 386 |  "regex-automata",
 387 |  "serde",
 388 |  "writeable",
 389 |  "zerovec",
 390 | ]
 391 | 
 392 | [[package]]
 393 | name = "icu_list_data"
 394 | version = "2.1.1"
 395 | source = "registry+https://github.com/rust-lang/crates.io-index"
 396 | checksum = "51044c242fe2a882cc0a464314bbdb9f441556a1cb238fb527fc47355ec2827b"
 397 | 
 398 | [[package]]
 399 | name = "icu_locale"
 400 | version = "2.1.1"
 401 | source = "registry+https://github.com/rust-lang/crates.io-index"
 402 | checksum = "532b11722e350ab6bf916ba6eb0efe3ee54b932666afec989465f9243fe6dd60"
 403 | dependencies = [
 404 |  "icu_collections",
 405 |  "icu_locale_core",
 406 |  "icu_locale_data",
 407 |  "icu_provider",
 408 |  "potential_utf",
 409 |  "tinystr",
 410 |  "zerovec",
 411 | ]
 412 | 
 413 | [[package]]
 414 | name = "icu_locale_core"
 415 | version = "2.1.1"
 416 | source = "registry+https://github.com/rust-lang/crates.io-index"
 417 | checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
 418 | dependencies = [
 419 |  "displaydoc",
 420 |  "litemap",
 421 |  "serde",
 422 |  "tinystr",
 423 |  "writeable",
 424 |  "zerovec",
 425 | ]
 426 | 
 427 | [[package]]
 428 | name = "icu_locale_data"
 429 | version = "2.1.1"
 430 | source = "registry+https://github.com/rust-lang/crates.io-index"
 431 | checksum = "f03e2fcaefecdf05619f3d6f91740e79ab969b4dd54f77cbf546b1d0d28e3147"
 432 | 
 433 | [[package]]
 434 | name = "icu_normalizer"
 435 | version = "2.1.1"
 436 | source = "registry+https://github.com/rust-lang/crates.io-index"
 437 | checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
 438 | dependencies = [
 439 |  "icu_collections",
 440 |  "icu_normalizer_data",
 441 |  "icu_properties",
 442 |  "icu_provider",
 443 |  "smallvec",
 444 |  "utf16_iter",
 445 |  "utf8_iter",
 446 |  "write16",
 447 |  "zerovec",
 448 | ]
 449 | 
 450 | [[package]]
 451 | name = "icu_normalizer_data"
 452 | version = "2.1.1"
 453 | source = "registry+https://github.com/rust-lang/crates.io-index"
 454 | checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
 455 | 
 456 | [[package]]
 457 | name = "icu_pattern"
 458 | version = "0.4.1"
 459 | source = "registry+https://github.com/rust-lang/crates.io-index"
 460 | checksum = "7a7ff8c0ff6f61cdce299dcb54f557b0a251adbc78f6f0c35a21332c452b4a1b"
 461 | dependencies = [
 462 |  "displaydoc",
 463 |  "either",
 464 |  "serde",
 465 |  "writeable",
 466 |  "yoke",
 467 |  "zerovec",
 468 | ]
 469 | 
 470 | [[package]]
 471 | name = "icu_plurals"
 472 | version = "2.1.1"
 473 | source = "registry+https://github.com/rust-lang/crates.io-index"
 474 | checksum = "4f9cfe49f5b1d1163cc58db451562339916a9ca5cbcaae83924d41a0bf839474"
 475 | dependencies = [
 476 |  "fixed_decimal",
 477 |  "icu_locale",
 478 |  "icu_plurals_data",
 479 |  "icu_provider",
 480 |  "zerovec",
 481 | ]
 482 | 
 483 | [[package]]
 484 | name = "icu_plurals_data"
 485 | version = "2.1.1"
 486 | source = "registry+https://github.com/rust-lang/crates.io-index"
 487 | checksum = "f018a98dccf7f0eb02ba06ac0ff67d102d8ded80734724305e924de304e12ff0"
 488 | 
 489 | [[package]]
 490 | name = "icu_properties"
 491 | version = "2.1.1"
 492 | source = "registry+https://github.com/rust-lang/crates.io-index"
 493 | checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
 494 | dependencies = [
 495 |  "icu_collections",
 496 |  "icu_locale_core",
 497 |  "icu_properties_data",
 498 |  "icu_provider",
 499 |  "serde",
 500 |  "zerotrie",
 501 |  "zerovec",
 502 | ]
 503 | 
 504 | [[package]]
 505 | name = "icu_properties_data"
 506 | version = "2.1.1"
 507 | source = "registry+https://github.com/rust-lang/crates.io-index"
 508 | checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
 509 | 
 510 | [[package]]
 511 | name = "icu_provider"
 512 | version = "2.1.1"
 513 | source = "registry+https://github.com/rust-lang/crates.io-index"
 514 | checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
 515 | dependencies = [
 516 |  "displaydoc",
 517 |  "icu_locale_core",
 518 |  "serde",
 519 |  "stable_deref_trait",
 520 |  "writeable",
 521 |  "yoke",
 522 |  "zerofrom",
 523 |  "zerotrie",
 524 |  "zerovec",
 525 | ]
 526 | 
 527 | [[package]]
 528 | name = "icu_segmenter"
 529 | version = "2.1.1"
 530 | source = "registry+https://github.com/rust-lang/crates.io-index"
 531 | checksum = "43da5e7e9b540df15e53ca27f69b50e36e01b652584b40b3335ed65d18303834"
 532 | dependencies = [
 533 |  "core_maths",
 534 |  "icu_collections",
 535 |  "icu_locale",
 536 |  "icu_provider",
 537 |  "icu_segmenter_data",
 538 |  "potential_utf",
 539 |  "utf8_iter",
 540 |  "zerovec",
 541 | ]
 542 | 
 543 | [[package]]
 544 | name = "icu_segmenter_data"
 545 | version = "2.1.1"
 546 | source = "registry+https://github.com/rust-lang/crates.io-index"
 547 | checksum = "6ebbb7321d9e21d25f5660366cb6c08201d0175898a3a6f7a41ee9685af21c80"
 548 | 
 549 | [[package]]
 550 | name = "icu_time"
 551 | version = "2.1.1"
 552 | source = "registry+https://github.com/rust-lang/crates.io-index"
 553 | checksum = "8242b00da3b3b6678f731437a11c8833a43c821ae081eca60ba1b7579d45b6d8"
 554 | dependencies = [
 555 |  "calendrical_calculations",
 556 |  "displaydoc",
 557 |  "icu_calendar",
 558 |  "icu_locale_core",
 559 |  "icu_provider",
 560 |  "icu_time_data",
 561 |  "ixdtf",
 562 |  "serde",
 563 |  "zerotrie",
 564 |  "zerovec",
 565 | ]
 566 | 
 567 | [[package]]
 568 | name = "icu_time_data"
 569 | version = "2.1.1"
 570 | source = "registry+https://github.com/rust-lang/crates.io-index"
 571 | checksum = "3e10b0e5e87a2c84bd5fa407705732052edebe69291d347d0c3033785470edbf"
 572 | 
 573 | [[package]]
 574 | name = "is_terminal_polyfill"
 575 | version = "1.70.2"
 576 | source = "registry+https://github.com/rust-lang/crates.io-index"
 577 | checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 578 | 
 579 | [[package]]
 580 | name = "itertools"
 581 | version = "0.14.0"
 582 | source = "registry+https://github.com/rust-lang/crates.io-index"
 583 | checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
 584 | dependencies = [
 585 |  "either",
 586 | ]
 587 | 
 588 | [[package]]
 589 | name = "itoa"
 590 | version = "1.0.15"
 591 | source = "registry+https://github.com/rust-lang/crates.io-index"
 592 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 593 | 
 594 | [[package]]
 595 | name = "ixdtf"
 596 | version = "0.6.4"
 597 | source = "registry+https://github.com/rust-lang/crates.io-index"
 598 | checksum = "84de9d95a6d2547d9b77ee3f25fa0ee32e3c3a6484d47a55adebc0439c077992"
 599 | 
 600 | [[package]]
 601 | name = "libc"
 602 | version = "0.2.178"
 603 | source = "registry+https://github.com/rust-lang/crates.io-index"
 604 | checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
 605 | 
 606 | [[package]]
 607 | name = "libm"
 608 | version = "0.2.15"
 609 | source = "registry+https://github.com/rust-lang/crates.io-index"
 610 | checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
 611 | 
 612 | [[package]]
 613 | name = "litemap"
 614 | version = "0.8.1"
 615 | source = "registry+https://github.com/rust-lang/crates.io-index"
 616 | checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
 617 | 
 618 | [[package]]
 619 | name = "memchr"
 620 | version = "2.7.6"
 621 | source = "registry+https://github.com/rust-lang/crates.io-index"
 622 | checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 623 | 
 624 | [[package]]
 625 | name = "num-bigint"
 626 | version = "0.4.6"
 627 | source = "registry+https://github.com/rust-lang/crates.io-index"
 628 | checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
 629 | dependencies = [
 630 |  "num-integer",
 631 |  "num-traits",
 632 | ]
 633 | 
 634 | [[package]]
 635 | name = "num-integer"
 636 | version = "0.1.46"
 637 | source = "registry+https://github.com/rust-lang/crates.io-index"
 638 | checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
 639 | dependencies = [
 640 |  "num-traits",
 641 | ]
 642 | 
 643 | [[package]]
 644 | name = "num-rational"
 645 | version = "0.4.2"
 646 | source = "registry+https://github.com/rust-lang/crates.io-index"
 647 | checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
 648 | dependencies = [
 649 |  "num-bigint",
 650 |  "num-integer",
 651 |  "num-traits",
 652 | ]
 653 | 
 654 | [[package]]
 655 | name = "num-traits"
 656 | version = "0.2.19"
 657 | source = "registry+https://github.com/rust-lang/crates.io-index"
 658 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
 659 | dependencies = [
 660 |  "autocfg",
 661 | ]
 662 | 
 663 | [[package]]
 664 | name = "once_cell_polyfill"
 665 | version = "1.70.2"
 666 | source = "registry+https://github.com/rust-lang/crates.io-index"
 667 | checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 668 | 
 669 | [[package]]
 670 | name = "potential_utf"
 671 | version = "0.1.4"
 672 | source = "registry+https://github.com/rust-lang/crates.io-index"
 673 | checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
 674 | dependencies = [
 675 |  "serde_core",
 676 |  "writeable",
 677 |  "zerovec",
 678 | ]
 679 | 
 680 | [[package]]
 681 | name = "ppv-lite86"
 682 | version = "0.2.21"
 683 | source = "registry+https://github.com/rust-lang/crates.io-index"
 684 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 685 | dependencies = [
 686 |  "zerocopy",
 687 | ]
 688 | 
 689 | [[package]]
 690 | name = "proc-macro2"
 691 | version = "1.0.103"
 692 | source = "registry+https://github.com/rust-lang/crates.io-index"
 693 | checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
 694 | dependencies = [
 695 |  "unicode-ident",
 696 | ]
 697 | 
 698 | [[package]]
 699 | name = "quote"
 700 | version = "1.0.42"
 701 | source = "registry+https://github.com/rust-lang/crates.io-index"
 702 | checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
 703 | dependencies = [
 704 |  "proc-macro2",
 705 | ]
 706 | 
 707 | [[package]]
 708 | name = "r-efi"
 709 | version = "5.3.0"
 710 | source = "registry+https://github.com/rust-lang/crates.io-index"
 711 | checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 712 | 
 713 | [[package]]
 714 | name = "radix_fmt"
 715 | version = "1.0.0"
 716 | source = "registry+https://github.com/rust-lang/crates.io-index"
 717 | checksum = "ce082a9940a7ace2ad4a8b7d0b1eac6aa378895f18be598230c5f2284ac05426"
 718 | 
 719 | [[package]]
 720 | name = "rand"
 721 | version = "0.9.2"
 722 | source = "registry+https://github.com/rust-lang/crates.io-index"
 723 | checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 724 | dependencies = [
 725 |  "rand_chacha",
 726 |  "rand_core",
 727 | ]
 728 | 
 729 | [[package]]
 730 | name = "rand_chacha"
 731 | version = "0.9.0"
 732 | source = "registry+https://github.com/rust-lang/crates.io-index"
 733 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 734 | dependencies = [
 735 |  "ppv-lite86",
 736 |  "rand_core",
 737 | ]
 738 | 
 739 | [[package]]
 740 | name = "rand_core"
 741 | version = "0.9.3"
 742 | source = "registry+https://github.com/rust-lang/crates.io-index"
 743 | checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 744 | dependencies = [
 745 |  "getrandom",
 746 | ]
 747 | 
 748 | [[package]]
 749 | name = "regex-automata"
 750 | version = "0.4.13"
 751 | source = "registry+https://github.com/rust-lang/crates.io-index"
 752 | checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 753 | 
 754 | [[package]]
 755 | name = "ryu"
 756 | version = "1.0.20"
 757 | source = "registry+https://github.com/rust-lang/crates.io-index"
 758 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 759 | 
 760 | [[package]]
 761 | name = "serde"
 762 | version = "1.0.228"
 763 | source = "registry+https://github.com/rust-lang/crates.io-index"
 764 | checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 765 | dependencies = [
 766 |  "serde_core",
 767 |  "serde_derive",
 768 | ]
 769 | 
 770 | [[package]]
 771 | name = "serde_core"
 772 | version = "1.0.228"
 773 | source = "registry+https://github.com/rust-lang/crates.io-index"
 774 | checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
 775 | dependencies = [
 776 |  "serde_derive",
 777 | ]
 778 | 
 779 | [[package]]
 780 | name = "serde_derive"
 781 | version = "1.0.228"
 782 | source = "registry+https://github.com/rust-lang/crates.io-index"
 783 | checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 784 | dependencies = [
 785 |  "proc-macro2",
 786 |  "quote",
 787 |  "syn",
 788 | ]
 789 | 
 790 | [[package]]
 791 | name = "serde_json"
 792 | version = "1.0.145"
 793 | source = "registry+https://github.com/rust-lang/crates.io-index"
 794 | checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
 795 | dependencies = [
 796 |  "itoa",
 797 |  "memchr",
 798 |  "ryu",
 799 |  "serde",
 800 |  "serde_core",
 801 | ]
 802 | 
 803 | [[package]]
 804 | name = "smallvec"
 805 | version = "1.15.1"
 806 | source = "registry+https://github.com/rust-lang/crates.io-index"
 807 | checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
 808 | 
 809 | [[package]]
 810 | name = "stable_deref_trait"
 811 | version = "1.2.1"
 812 | source = "registry+https://github.com/rust-lang/crates.io-index"
 813 | checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 814 | 
 815 | [[package]]
 816 | name = "strsim"
 817 | version = "0.11.1"
 818 | source = "registry+https://github.com/rust-lang/crates.io-index"
 819 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 820 | 
 821 | [[package]]
 822 | name = "syn"
 823 | version = "2.0.111"
 824 | source = "registry+https://github.com/rust-lang/crates.io-index"
 825 | checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
 826 | dependencies = [
 827 |  "proc-macro2",
 828 |  "quote",
 829 |  "unicode-ident",
 830 | ]
 831 | 
 832 | [[package]]
 833 | name = "synstructure"
 834 | version = "0.13.2"
 835 | source = "registry+https://github.com/rust-lang/crates.io-index"
 836 | checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 837 | dependencies = [
 838 |  "proc-macro2",
 839 |  "quote",
 840 |  "syn",
 841 | ]
 842 | 
 843 | [[package]]
 844 | name = "tidy"
 845 | version = "0.3.17"
 846 | dependencies = [
 847 |  "clap",
 848 |  "icu",
 849 |  "itertools",
 850 |  "memchr",
 851 |  "radix_fmt",
 852 |  "rand",
 853 |  "serde",
 854 |  "serde_json",
 855 |  "unicode-normalization",
 856 |  "unicode-segmentation",
 857 | ]
 858 | 
 859 | [[package]]
 860 | name = "tinystr"
 861 | version = "0.8.2"
 862 | source = "registry+https://github.com/rust-lang/crates.io-index"
 863 | checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
 864 | dependencies = [
 865 |  "displaydoc",
 866 |  "serde_core",
 867 |  "zerovec",
 868 | ]
 869 | 
 870 | [[package]]
 871 | name = "tinyvec"
 872 | version = "1.10.0"
 873 | source = "registry+https://github.com/rust-lang/crates.io-index"
 874 | checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
 875 | dependencies = [
 876 |  "tinyvec_macros",
 877 | ]
 878 | 
 879 | [[package]]
 880 | name = "tinyvec_macros"
 881 | version = "0.1.1"
 882 | source = "registry+https://github.com/rust-lang/crates.io-index"
 883 | checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 884 | 
 885 | [[package]]
 886 | name = "unicode-ident"
 887 | version = "1.0.22"
 888 | source = "registry+https://github.com/rust-lang/crates.io-index"
 889 | checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 890 | 
 891 | [[package]]
 892 | name = "unicode-normalization"
 893 | version = "0.1.25"
 894 | source = "registry+https://github.com/rust-lang/crates.io-index"
 895 | checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
 896 | dependencies = [
 897 |  "tinyvec",
 898 | ]
 899 | 
 900 | [[package]]
 901 | name = "unicode-segmentation"
 902 | version = "1.12.0"
 903 | source = "registry+https://github.com/rust-lang/crates.io-index"
 904 | checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 905 | 
 906 | [[package]]
 907 | name = "utf16_iter"
 908 | version = "1.0.5"
 909 | source = "registry+https://github.com/rust-lang/crates.io-index"
 910 | checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
 911 | 
 912 | [[package]]
 913 | name = "utf8_iter"
 914 | version = "1.0.4"
 915 | source = "registry+https://github.com/rust-lang/crates.io-index"
 916 | checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 917 | 
 918 | [[package]]
 919 | name = "utf8parse"
 920 | version = "0.2.2"
 921 | source = "registry+https://github.com/rust-lang/crates.io-index"
 922 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 923 | 
 924 | [[package]]
 925 | name = "wasip2"
 926 | version = "1.0.1+wasi-0.2.4"
 927 | source = "registry+https://github.com/rust-lang/crates.io-index"
 928 | checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
 929 | dependencies = [
 930 |  "wit-bindgen",
 931 | ]
 932 | 
 933 | [[package]]
 934 | name = "windows-link"
 935 | version = "0.2.1"
 936 | source = "registry+https://github.com/rust-lang/crates.io-index"
 937 | checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 938 | 
 939 | [[package]]
 940 | name = "windows-sys"
 941 | version = "0.61.2"
 942 | source = "registry+https://github.com/rust-lang/crates.io-index"
 943 | checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
 944 | dependencies = [
 945 |  "windows-link",
 946 | ]
 947 | 
 948 | [[package]]
 949 | name = "wit-bindgen"
 950 | version = "0.46.0"
 951 | source = "registry+https://github.com/rust-lang/crates.io-index"
 952 | checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
 953 | 
 954 | [[package]]
 955 | name = "write16"
 956 | version = "1.0.0"
 957 | source = "registry+https://github.com/rust-lang/crates.io-index"
 958 | checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
 959 | 
 960 | [[package]]
 961 | name = "writeable"
 962 | version = "0.6.2"
 963 | source = "registry+https://github.com/rust-lang/crates.io-index"
 964 | checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 965 | dependencies = [
 966 |  "either",
 967 | ]
 968 | 
 969 | [[package]]
 970 | name = "yoke"
 971 | version = "0.8.1"
 972 | source = "registry+https://github.com/rust-lang/crates.io-index"
 973 | checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
 974 | dependencies = [
 975 |  "stable_deref_trait",
 976 |  "yoke-derive",
 977 |  "zerofrom",
 978 | ]
 979 | 
 980 | [[package]]
 981 | name = "yoke-derive"
 982 | version = "0.8.1"
 983 | source = "registry+https://github.com/rust-lang/crates.io-index"
 984 | checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 985 | dependencies = [
 986 |  "proc-macro2",
 987 |  "quote",
 988 |  "syn",
 989 |  "synstructure",
 990 | ]
 991 | 
 992 | [[package]]
 993 | name = "zerocopy"
 994 | version = "0.8.31"
 995 | source = "registry+https://github.com/rust-lang/crates.io-index"
 996 | checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
 997 | dependencies = [
 998 |  "zerocopy-derive",
 999 | ]
1000 | 
1001 | [[package]]
1002 | name = "zerocopy-derive"
1003 | version = "0.8.31"
1004 | source = "registry+https://github.com/rust-lang/crates.io-index"
1005 | checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
1006 | dependencies = [
1007 |  "proc-macro2",
1008 |  "quote",
1009 |  "syn",
1010 | ]
1011 | 
1012 | [[package]]
1013 | name = "zerofrom"
1014 | version = "0.1.6"
1015 | source = "registry+https://github.com/rust-lang/crates.io-index"
1016 | checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
1017 | dependencies = [
1018 |  "zerofrom-derive",
1019 | ]
1020 | 
1021 | [[package]]
1022 | name = "zerofrom-derive"
1023 | version = "0.1.6"
1024 | source = "registry+https://github.com/rust-lang/crates.io-index"
1025 | checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
1026 | dependencies = [
1027 |  "proc-macro2",
1028 |  "quote",
1029 |  "syn",
1030 |  "synstructure",
1031 | ]
1032 | 
1033 | [[package]]
1034 | name = "zerotrie"
1035 | version = "0.2.3"
1036 | source = "registry+https://github.com/rust-lang/crates.io-index"
1037 | checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
1038 | dependencies = [
1039 |  "displaydoc",
1040 |  "yoke",
1041 |  "zerofrom",
1042 | ]
1043 | 
1044 | [[package]]
1045 | name = "zerovec"
1046 | version = "0.11.5"
1047 | source = "registry+https://github.com/rust-lang/crates.io-index"
1048 | checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
1049 | dependencies = [
1050 |  "serde",
1051 |  "yoke",
1052 |  "zerofrom",
1053 |  "zerovec-derive",
1054 | ]
1055 | 
1056 | [[package]]
1057 | name = "zerovec-derive"
1058 | version = "0.11.2"
1059 | source = "registry+https://github.com/rust-lang/crates.io-index"
1060 | checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
1061 | dependencies = [
1062 |  "proc-macro2",
1063 |  "quote",
1064 |  "syn",
1065 | ]
1066 | 


--------------------------------------------------------------------------------