├── .gitignore
├── src
    ├── trie_node.rs
    ├── trie.rs
    ├── trie_node
    │   ├── regular_node.rs
    │   └── data_node.rs
    ├── trie
    │   ├── regular_trie.rs
    │   └── data_trie.rs
    └── lib.rs
├── .github
    └── workflows
    │   └── rust.yml
├── Cargo.toml
├── LICENSE
├── README.md
└── tests
    ├── complete_test_regular.rs
    └── complete_test_data.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 | .idea/
4 | *.txt


--------------------------------------------------------------------------------
/src/trie_node.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "data")]
 2 | mod data_node;
 3 | 
 4 | mod regular_node;
 5 | 
 6 | #[cfg(feature = "data")]
 7 | pub(crate) use data_node::TrieDataNode;
 8 | 
 9 | pub(crate) use regular_node::TrieDatalessNode;
10 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Test CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   test:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v3
19 |     - name: Build
20 |       run: cargo build --release --verbose
21 |     - name: Run tests
22 |       run: cargo test --all --verbose
23 | 


--------------------------------------------------------------------------------
/src/trie.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "unicode")]
 2 | use unicode_segmentation::UnicodeSegmentation;
 3 | 
 4 | #[cfg(feature = "data")]
 5 | mod data_trie;
 6 | 
 7 | #[cfg(feature = "data")]
 8 | pub use data_trie::DataTrie;
 9 | 
10 | mod regular_trie;
11 | 
12 | pub use regular_trie::Trie;
13 | 
14 | /// Function returns true characters if the 'unicode' feature is enabled,
15 | /// else it splits on "" and removes the first and last element, which may
16 | /// result in wrong data if used with unicode text.
17 | fn get_characters(word: &str) -> Vec<&str> {
18 |     #[cfg(feature = "unicode")]
19 |     return UnicodeSegmentation::graphemes(word, true).collect();
20 | 
21 |     #[cfg(not(feature = "unicode"))]
22 |     {
23 |         word.split("")
24 |             .collect::<Vec<&str>>()
25 |             .iter()
26 |             .skip(1)
27 |             .rev()
28 |             .skip(1)
29 |             .rev()
30 |             .cloned()
31 |             .collect()
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "basic_trie"
 3 | version = "2.0.0"
 4 | edition = "2021"
 5 | authors = ["LukasCobbler"]
 6 | license-file = "LICENSE"
 7 | readme = "README.md"
 8 | keywords = ["trie", "collection", "generic"]
 9 | description = "A simple Trie implementation in Rust"
10 | repository = "https://github.com/lukascobbler/basic_trie"
11 | documentation = "https://docs.rs/basic_trie/"
12 | categories = ["data-structures"]
13 | 
14 | [dev-dependencies]
15 | serde_json = "1.0.*"
16 | serde-pickle = "1.1.*"
17 | peak_alloc = "0.2.0"
18 | randomizer = "0.1.2"
19 | growable-bloom-filter = "2.1.0"
20 | 
21 | [dependencies]
22 | unicode-segmentation = { version = "1.11.0", optional = true }
23 | serde_crate = { package = "serde", optional = true, version = "1.0.*", features = ["derive"] }
24 | fxhash = "0.2.1"
25 | thin-vec = "0.2.12"
26 | arrayvec = "0.7.4"
27 | fixedstr = "0.5.5"
28 | 
29 | [features]
30 | default = ["unicode", "data"]
31 | data = []
32 | unicode = ["unicode-segmentation"]
33 | serde = ["serde_crate", "thin-vec/serde", "arrayvec/serde"]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LukasCobbler
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Basic Trie
  2 | 
  3 | [![Test CI](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml/badge.svg)](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml)
  4 | 
  5 | The trie data structure is used for quick access to words and
  6 | data that should (could) be associated with them.
  7 | 
  8 | **Basic Trie** is implemented as a tree where each node holds a single character
  9 | that could point at any other character thus allowing insertion of arbitrary words.
 10 | 
 11 | ##### There are two major implementations:
 12 | - Trie where words are inserted with nothing attached to them
 13 | - Data Trie where each word has a corresponding vector of data attached to it
 14 | 
 15 | Regular tries are often used for word lookups and prefix matching, and data tries are
 16 | often used for finding all data that is connected to some prefix.
 17 | 
 18 | For example, when inserting a whole book in the trie, you could insert every word with
 19 | the corresponding page number it's on. Later when searching for the word, you could get all
 20 | the pages the word is on with no added performance cost.
 21 | 
 22 | ### Global features
 23 | - insertion / removal of words
 24 | - fast contains check
 25 | - finding words based on a prefix
 26 | - longest / shortest words in the trie
 27 | - generic methods: `is_empty`, `len`, `clear`
 28 | - Trie equality with `==`
 29 | - Trie merging with `+` or `+=`
 30 | 
 31 | ### Data Trie features
 32 | - generic type implementation for associating a word to any type, with zero trait constraints
 33 | - finding data of words based on exact match or prefix
 34 | 
 35 | ### Optional features
 36 | - unicode support via the 'unicode' feature with the `unicode-segmentation` crate (enabled by default)
 37 | - data trie support via the 'data' feature (enabled by default)
 38 | - serialization and deserialization via the 'serde' feature with the `serde` crate
 39 | 
 40 | ### Dependencies
 41 | - `unicode-segmentation` (enabled by default)
 42 | - `serde` (only with 'serde' feature flag)
 43 | - `fxhash`
 44 | - `thin-vec`
 45 | - `arrayvec`
 46 | 
 47 | ### License
 48 | The software is licensed under the MIT license.
 49 | 
 50 | ### Examples
 51 | 
 52 | ```rust
 53 |  use basic_trie::Trie;
 54 | 
 55 |  let mut trie = Trie::new();
 56 |  trie.insert("eat");
 57 |  trie.insert("eating");
 58 |  trie.insert("wizard");
 59 | 
 60 |  let mut found_longest_words = trie.get_longest();
 61 |  found_longest_words.sort();
 62 | 
 63 |  assert!(trie.contains("wizard"));
 64 |  assert_eq!(vec![String::from("eating"), String::from("wizard")], found_longest_words);
 65 |  assert_eq!(vec![String::from("eat")], trie.get_shortest());
 66 |  assert_eq!(3, trie.len());
 67 |  ```
 68 | 
 69 |  ```rust
 70 |  use basic_trie::DataTrie;
 71 | 
 72 |  let mut data_trie = DataTrie::<u32>::new();
 73 |  data_trie.insert("apple", 1);
 74 |  data_trie.insert("apple", 2);
 75 |  data_trie.insert_no_data("banana");
 76 |  data_trie.insert("avocado", 15);
 77 | 
 78 | let mut found_data = data_trie.get_data("apple", false).unwrap();
 79 | found_data.sort();
 80 | assert_eq!(vec![&1, &2], found_data);
 81 | 
 82 | let mut found_data = data_trie.get_data("a", true).unwrap();
 83 | found_data.sort();
 84 | assert_eq!(vec![&1, &2, &15], found_data);
 85 | 
 86 | assert_eq!(vec![15], data_trie.remove("avocado").unwrap());
 87 |  ```
 88 | 
 89 | ## Changelog
 90 | - **2.0.0** - Major redesign: increased memory efficiency for the regular Trie (used to be Dataless Trie);
 91 | Changed API names to better match the standard library; splitting the two implementations code-wise thus
 92 | fixing the documentation not rendering bug.
 93 | - **1.2.3** – Adding dependencies for even more memory layout optimisations.
 94 | - **1.2.2** – More memory optimisations with Box.
 95 | - **1.2.1** – Memory performance upgrade with Box. Mutable data retrieval.
 96 | - **1.2.0** – Equality and addition operators support between
 97 | same Trie types via `==`, `+` and `+=`.
 98 | - **1.1.1** – Adding `FxHashMap` dependency for boosted performance.
 99 | - **1.1.0** – Serialization with the `serde` crate and the 'serde' feature.
100 | - **1.0.3** – Optimisation of `number_of_words()`. Removing lifetime requirements
101 | for word insertion for much better flexibility at the same logical memory cost.
102 | - **1.0.2** – Bug fixes.
103 | - **1.0.1** – `insert_no_data()` for `DataTrie`. Bugfixes.
104 | - **1.0.0** – Separation of `DataTrie` and `DatalessTrie`. Optimizing
105 | performance for `DatalessTrie`. Incompatible with older versions.
106 | - **<1.0.0** – Simple `Trie` with data and base features.
107 | 
108 | 


--------------------------------------------------------------------------------
/tests/complete_test_regular.rs:
--------------------------------------------------------------------------------
  1 | use basic_trie::Trie;
  2 | use growable_bloom_filter::GrowableBloom;
  3 | use peak_alloc::PeakAlloc;
  4 | use randomizer::Randomizer;
  5 | use std::collections::HashMap;
  6 | use std::collections::HashSet;
  7 | use std::time::Instant;
  8 | use unicode_segmentation::UnicodeSegmentation;
  9 | 
 10 | #[global_allocator]
 11 | static PEAK_ALLOC: PeakAlloc = PeakAlloc;
 12 | 
 13 | pub struct BigData {
 14 |     pub number_of_words: usize,
 15 |     pub first_letter_histogram: HashMap<String, usize>,
 16 |     pub big_data: Vec<String>,
 17 | }
 18 | 
 19 | pub fn generate_random_lines(x: usize, y: usize) -> BigData {
 20 |     let mut bloom_filter = GrowableBloom::new(0.01, x);
 21 |     let mut result = Vec::new();
 22 |     let mut first_letter_histogram = HashMap::<String, usize>::new();
 23 |     let mut number_of_words = 0;
 24 | 
 25 |     for _ in 0..x {
 26 |         let random_string = Randomizer::ALPHABETICAL_LOWER(y).string().unwrap();
 27 |         if !bloom_filter.contains(&random_string) {
 28 |             bloom_filter.insert(&random_string);
 29 | 
 30 |             let first_letter = random_string[0..1].to_string();
 31 |             *first_letter_histogram.entry(first_letter).or_insert(0) += 1;
 32 | 
 33 |             result.push(random_string);
 34 |             number_of_words += 1;
 35 |         }
 36 |     }
 37 | 
 38 |     BigData {
 39 |         number_of_words,
 40 |         first_letter_histogram,
 41 |         big_data: result,
 42 |     }
 43 | }
 44 | 
 45 | #[test]
 46 | fn overall_regular() {
 47 |     let number_of_words = 500_000;
 48 |     let word_length = 15;
 49 | 
 50 |     let big_data = generate_random_lines(number_of_words, word_length);
 51 | 
 52 |     println!(
 53 |         "Memory usage after loading words: {:.1}mb\n",
 54 |         PEAK_ALLOC.current_usage_as_mb()
 55 |     );
 56 | 
 57 |     let mut trie = Trie::new();
 58 | 
 59 |     for word in big_data.big_data {
 60 |         trie.insert(&word);
 61 |     }
 62 | 
 63 |     println!(
 64 |         "Memory usage after trie generation: {:.1}mb",
 65 |         PEAK_ALLOC.current_usage_as_mb()
 66 |     );
 67 | 
 68 |     let now = Instant::now();
 69 | 
 70 |     assert_eq!(trie.len(), big_data.number_of_words);
 71 |     assert_eq!(word_length, trie.get_longest()[0].graphemes(true).count());
 72 |     assert_eq!(word_length, trie.get_shortest()[0].graphemes(true).count());
 73 | 
 74 |     for (first_letter, count) in big_data.first_letter_histogram.iter() {
 75 |         assert_eq!(*count, trie.len_prefix(first_letter));
 76 |     }
 77 | 
 78 |     let elapsed = now.elapsed();
 79 |     println!("Operations time: {:.2?}", elapsed);
 80 | }
 81 | 
 82 | #[test]
 83 | fn clearing_regular() {
 84 |     let number_of_words = 500_000;
 85 |     let word_length = 15;
 86 | 
 87 |     let big_data = generate_random_lines(number_of_words, word_length);
 88 | 
 89 |     println!(
 90 |         "Memory usage after loading words: {:.1}mb\n",
 91 |         PEAK_ALLOC.current_usage_as_mb()
 92 |     );
 93 | 
 94 |     let mut trie = Trie::new();
 95 | 
 96 |     for word in big_data.big_data {
 97 |         trie.insert(&word);
 98 |     }
 99 | 
100 |     println!(
101 |         "Memory usage after trie generation: {:.1}mb\n",
102 |         PEAK_ALLOC.current_usage_as_mb()
103 |     );
104 | 
105 |     let now = Instant::now();
106 | 
107 |     trie.clear();
108 |     assert!(trie.is_empty());
109 | 
110 |     let elapsed = now.elapsed();
111 |     println!("Operations time: {:.2?}\n", elapsed);
112 | 
113 |     println!(
114 |         "Memory usage after trie cleanup: {:.1}mb\n",
115 |         PEAK_ALLOC.current_usage_as_mb()
116 |     );
117 | }
118 | 
119 | #[test]
120 | fn add_op_regular_1() {
121 |     let number_of_words = 100_000;
122 |     let word_length = 15;
123 | 
124 |     let big_data = generate_random_lines(number_of_words, word_length);
125 | 
126 |     let mut trie_0 = Trie::new();
127 |     let mut trie_1 = Trie::new();
128 |     let mut trie_2 = Trie::new();
129 | 
130 |     println!(
131 |         "Memory usage after loading words: {:.1}mb\n",
132 |         PEAK_ALLOC.current_usage_as_mb()
133 |     );
134 | 
135 |     for line in big_data.big_data.iter() {
136 |         trie_0.insert(line);
137 |     }
138 | 
139 |     for line in big_data
140 |         .big_data
141 |         .iter()
142 |         .rev()
143 |         .skip(big_data.number_of_words / 2)
144 |     {
145 |         trie_1.insert(line);
146 |     }
147 | 
148 |     for line in big_data.big_data.iter().skip(big_data.number_of_words / 2) {
149 |         trie_2.insert(line);
150 |     }
151 | 
152 |     println!(
153 |         "Memory usage after trie generation: {:.1}mb",
154 |         PEAK_ALLOC.current_usage_as_mb()
155 |     );
156 | 
157 |     let now = Instant::now();
158 | 
159 |     trie_1 += trie_2;
160 | 
161 |     let elapsed = now.elapsed();
162 |     println!("Operations time: {:.2?}", elapsed);
163 | 
164 |     let t1_words = trie_1.get_all();
165 |     let correct_words = trie_0.get_all();
166 | 
167 |     let item_set: HashSet<_> = t1_words.iter().collect();
168 |     let only_in_correct: Vec<_> = correct_words
169 |         .into_iter()
170 |         .filter(|item| !item_set.contains(item))
171 |         .collect();
172 | 
173 |     assert_eq!(only_in_correct, Vec::<String>::new());
174 |     assert_eq!(only_in_correct.len(), 0);
175 |     assert!(trie_0 == trie_1);
176 | 
177 |     println!(
178 |         "Memory usage after trie addition: {:.1}mb\n",
179 |         PEAK_ALLOC.current_usage_as_mb()
180 |     );
181 | }
182 | 
183 | #[test]
184 | fn add_op_regular_2() {
185 |     let number_of_words = 100_000;
186 |     let word_length = 15;
187 | 
188 |     let big_data = generate_random_lines(number_of_words, word_length);
189 | 
190 |     let mut trie_0 = Trie::new();
191 |     let mut trie_1 = Trie::new();
192 |     let mut trie_2 = Trie::new();
193 | 
194 |     println!(
195 |         "Memory usage after loading words: {:.1}mb\n",
196 |         PEAK_ALLOC.current_usage_as_mb()
197 |     );
198 | 
199 |     for line in big_data.big_data.iter() {
200 |         trie_0.insert(&line);
201 |     }
202 | 
203 |     for line in big_data.big_data.iter().rev().skip(20000) {
204 |         trie_1.insert(&line);
205 |     }
206 | 
207 |     for line in big_data.big_data.iter().skip(20000) {
208 |         trie_2.insert(&line);
209 |     }
210 | 
211 |     println!(
212 |         "Memory usage after trie generation: {:.1}mb",
213 |         PEAK_ALLOC.current_usage_as_mb()
214 |     );
215 | 
216 |     let now = Instant::now();
217 | 
218 |     let trie_3 = trie_1 + trie_2;
219 |     let elapsed = now.elapsed();
220 |     println!("Operations time: {:.2?}", elapsed);
221 | 
222 |     assert!(trie_0 == trie_3);
223 | 
224 |     let t3_words = trie_3.get_all();
225 |     let correct_words = trie_0.get_all();
226 | 
227 |     let item_set: HashSet<_> = t3_words.iter().collect();
228 |     let only_in_correct: Vec<_> = correct_words
229 |         .into_iter()
230 |         .filter(|item| !item_set.contains(item))
231 |         .collect();
232 | 
233 |     assert_eq!(only_in_correct.len(), 0);
234 | 
235 |     println!(
236 |         "Memory usage after trie addition: {:.1}mb\n",
237 |         PEAK_ALLOC.current_usage_as_mb()
238 |     );
239 | }
240 | 
241 | #[test]
242 | fn equals_regular() {
243 |     let number_of_words = 100_000;
244 |     let word_length = 15;
245 | 
246 |     let big_data = generate_random_lines(number_of_words, word_length);
247 | 
248 |     let mut trie_1 = Trie::new();
249 |     let mut trie_2 = Trie::new();
250 | 
251 |     println!(
252 |         "Memory usage after loading words: {:.1}mb\n",
253 |         PEAK_ALLOC.current_usage_as_mb()
254 |     );
255 | 
256 |     for line in big_data.big_data.iter() {
257 |         trie_1.insert(&line);
258 |     }
259 | 
260 |     for line in big_data.big_data.iter() {
261 |         trie_2.insert(&line);
262 |     }
263 | 
264 |     println!(
265 |         "Memory usage after trie generation: {:.1}mb",
266 |         PEAK_ALLOC.current_usage_as_mb()
267 |     );
268 | 
269 |     let now = Instant::now();
270 | 
271 |     let result = trie_1 == trie_2;
272 | 
273 |     let elapsed = now.elapsed();
274 |     println!("Operations time: {:.2?}", elapsed);
275 | 
276 |     assert!(result);
277 | 
278 |     println!(
279 |         "Memory usage after trie addition: {:.1}mb\n",
280 |         PEAK_ALLOC.current_usage_as_mb()
281 |     );
282 | }
283 | 


--------------------------------------------------------------------------------
/src/trie_node/regular_node.rs:
--------------------------------------------------------------------------------
  1 | use fxhash::FxHashMap;
  2 | use std::cmp::Ordering;
  3 | use std::ops;
  4 | 
  5 | #[cfg(feature = "serde")]
  6 | use serde_crate::{Deserialize, Serialize};
  7 | 
  8 | /// Singular trie node that represents its children and a marker for word ending.
  9 | #[derive(Debug, Default, Clone)]
 10 | #[cfg_attr(
 11 |     feature = "serde",
 12 |     derive(Serialize, Deserialize),
 13 |     serde(crate = "serde_crate")
 14 | )]
 15 | pub struct TrieDatalessNode {
 16 |     #[cfg_attr(feature = "serde", serde(rename = "c"))]
 17 |     pub(crate) children: Box<FxHashMap<arrayvec::ArrayString<4>, TrieDatalessNode>>,
 18 |     #[cfg_attr(feature = "serde", serde(rename = "we"))]
 19 |     word_end: bool,
 20 | }
 21 | 
 22 | impl TrieDatalessNode {
 23 |     /// Returns a new instance of a TrieNode.
 24 |     pub(crate) fn new() -> Self {
 25 |         TrieDatalessNode {
 26 |             children: Default::default(),
 27 |             word_end: false,
 28 |         }
 29 |     }
 30 | 
 31 |     /// Recursive function for inserting found words from the given node and
 32 |     /// given starting substring.
 33 |     pub(crate) fn find_words(&self, substring: &str, found_words: &mut Vec<String>) {
 34 |         if self.is_associated() {
 35 |             found_words.push(substring.to_string());
 36 |         }
 37 | 
 38 |         self.children.iter().for_each(|(character, node)| {
 39 |             node.find_words(&(substring.to_owned() + character), found_words)
 40 |         });
 41 |     }
 42 | 
 43 |     /// The recursive function for finding a vector of shortest and longest words in the TrieNode consists of:
 44 |     /// - the DFS tree traversal part for getting to every child node;
 45 |     /// - matching lengths of found words in combination with the passed ordering.
 46 |     pub(crate) fn words_min_max(
 47 |         &self,
 48 |         substring: &str,
 49 |         found_words: &mut Vec<String>,
 50 |         ord: Ordering,
 51 |     ) {
 52 |         'word: {
 53 |             if self.is_associated() {
 54 |                 if let Some(found) = found_words.first() {
 55 |                     match substring.len().cmp(&found.len()) {
 56 |                         Ordering::Less if ord == Ordering::Less => {
 57 |                             found_words.clear();
 58 |                         }
 59 |                         Ordering::Greater if ord == Ordering::Greater => {
 60 |                             found_words.clear();
 61 |                         }
 62 |                         Ordering::Equal => (),
 63 |                         _ => break 'word,
 64 |                     }
 65 |                 }
 66 |                 found_words.push(substring.to_string());
 67 |             }
 68 |         }
 69 | 
 70 |         self.children.iter().for_each(|(character, node)| {
 71 |             node.words_min_max(&(substring.to_owned() + character), found_words, ord)
 72 |         });
 73 |     }
 74 | 
 75 |     /// Recursive function that drops all children maps
 76 |     /// regardless of having multiple words branching from them or not.
 77 |     /// Counts the number of words removed.
 78 |     pub(crate) fn remove_all_words(&mut self) -> usize {
 79 |         let num_removed = self
 80 |             .children
 81 |             .values_mut()
 82 |             .map(|child| child.remove_all_words())
 83 |             .sum::<usize>()
 84 |             + self.is_associated() as usize;
 85 | 
 86 |         self.clear_children();
 87 | 
 88 |         num_removed
 89 |     }
 90 | 
 91 |     /// Recursive function that counts the number of words from a starting node.
 92 |     pub(crate) fn count_words(&self) -> usize {
 93 |         self.children
 94 |             .values()
 95 |             .map(|child| child.count_words())
 96 |             .sum::<usize>()
 97 |             + self.is_associated() as usize
 98 |     }
 99 | 
100 |     /// Recursive function for removing and freeing memory of a word that is not needed anymore.
101 |     /// The algorithm first finds the last node of a word given in the form of a character iterator,
102 |     /// then it frees the maps and unwinds to the first node that should not be deleted.
103 |     /// The first node that should not be deleted is either:
104 |     /// - the root node
105 |     /// - the node that has multiple words branching from it
106 |     /// - the node that represents an end to some word with the same prefix
107 |     /// The last node's data is propagated all the way to the final return
108 |     /// with the help of auxiliary 'RemoveData<D>' struct.
109 |     pub(crate) fn remove_one_word<'b>(
110 |         &mut self,
111 |         mut characters: impl Iterator<Item = &'b str>,
112 |     ) -> bool {
113 |         let next_character = match characters.next() {
114 |             None => {
115 |                 self.disassociate();
116 |                 return false;
117 |             }
118 |             Some(char) => char,
119 |         };
120 | 
121 |         let next_node = self.children.get_mut(next_character).unwrap();
122 |         let must_keep = next_node.remove_one_word(characters);
123 | 
124 |         if self.children.len() > 1 || must_keep {
125 |             return true;
126 |         }
127 |         self.clear_children();
128 | 
129 |         self.is_associated()
130 |     }
131 | 
132 |     /// Function marks the node as an end of a word.
133 |     pub(crate) fn associate(&mut self) {
134 |         self.word_end = true;
135 |     }
136 | 
137 |     /// Function unmarks the node as an end of a word.
138 |     pub(crate) fn disassociate(&mut self) {
139 |         self.word_end = false;
140 |     }
141 | 
142 |     pub(crate) fn is_associated(&self) -> bool {
143 |         self.word_end
144 |     }
145 | 
146 |     /// Function removes all children of a node.
147 |     pub(crate) fn clear_children(&mut self) {
148 |         self.children = Default::default();
149 |     }
150 | }
151 | 
152 | impl ops::AddAssign for TrieDatalessNode {
153 |     /// Overriding the += operator on nodes.
154 |     /// Function adds two nodes based on the principle:
155 |     /// for every child node and character in the 'rhs' node:
156 |     /// - if the self node doesn't have that character in it's children map,
157 |     /// simply move the pointer to the self's children map without any extra cost;
158 |     /// - if the self node has that character, the node of that character (self's child)
159 |     /// is added with the 'rhc's' node.
160 |     /// An edge case exists when the 'rhc's' node has an association but self's node doesn't.
161 |     /// That association is handled based on the result of 'rhc_next_node.word_end'.
162 |     /// On true, the self node vector is initialized with the 'rhc' node vector.
163 |     fn add_assign(&mut self, rhs: Self) {
164 |         for (char, rhs_next_node) in rhs.children.into_iter() {
165 |             // Does self contain the character?
166 |             match self.children.remove(&*char) {
167 |                 // The whole node is removed, as owned, operated on and returned in self's children.
168 |                 Some(mut self_next_node) => {
169 |                     // Edge case: associate self node if the other node is also associated
170 |                     // Example: when adding 'word' to 'word1', 'd' on 'word' needs to be associated
171 |                     if rhs_next_node.word_end {
172 |                         self_next_node.word_end = true;
173 |                     }
174 | 
175 |                     self_next_node += rhs_next_node;
176 |                     self.children.insert(char, self_next_node);
177 |                 }
178 |                 // Self doesn't contain the character, no conflict arises.
179 |                 // The whole 'rhs' node is just moved from 'rhs' into self.
180 |                 None => {
181 |                     self.children.insert(char, rhs_next_node);
182 |                 }
183 |             }
184 |         }
185 |     }
186 | }
187 | 
188 | impl PartialEq for TrieDatalessNode {
189 |     fn eq(&self, other: &Self) -> bool {
190 |         // If keys aren't equal, nodes aren't equal.
191 |         if !(self.children.len() == other.children.len()
192 |             && self.children.keys().all(|k| other.children.contains_key(k)))
193 |         {
194 |             return false;
195 |         }
196 | 
197 |         // If the node on one trie is a word end, and on the other it isn't, two nodes aren't equal.
198 |         if self.word_end != other.word_end {
199 |             return false;
200 |         }
201 | 
202 |         // Every child node that has the same key (character) must be equal.
203 |         self.children
204 |             .iter()
205 |             .map(|(char, self_child)| (self_child, other.children.get(char).unwrap()))
206 |             .all(|(self_child, other_child)| other_child == self_child)
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/tests/complete_test_data.rs:
--------------------------------------------------------------------------------
  1 | use basic_trie::DataTrie;
  2 | use growable_bloom_filter::GrowableBloom;
  3 | use peak_alloc::PeakAlloc;
  4 | use randomizer::Randomizer;
  5 | use std::collections::HashMap;
  6 | use std::collections::HashSet;
  7 | use std::time::Instant;
  8 | use unicode_segmentation::UnicodeSegmentation;
  9 | 
 10 | #[global_allocator]
 11 | static PEAK_ALLOC: PeakAlloc = PeakAlloc;
 12 | 
 13 | pub struct BigData {
 14 |     pub number_of_words: usize,
 15 |     pub first_letter_histogram: HashMap<String, usize>,
 16 |     pub big_data: Vec<String>,
 17 | }
 18 | 
 19 | pub fn generate_random_lines(x: usize, y: usize) -> BigData {
 20 |     let mut bloom_filter = GrowableBloom::new(0.01, x);
 21 |     let mut result = Vec::new();
 22 |     let mut first_letter_histogram = HashMap::<String, usize>::new();
 23 |     let mut number_of_words = 0;
 24 | 
 25 |     while number_of_words != x {
 26 |         let random_string = Randomizer::ALPHABETICAL_LOWER(y).string().unwrap();
 27 |         if !bloom_filter.contains(&random_string) {
 28 |             bloom_filter.insert(&random_string);
 29 | 
 30 |             let first_letter = random_string[0..1].to_string();
 31 |             *first_letter_histogram.entry(first_letter).or_insert(0) += 1;
 32 | 
 33 |             result.push(random_string);
 34 |             number_of_words += 1;
 35 |         }
 36 |     }
 37 | 
 38 |     BigData {
 39 |         number_of_words,
 40 |         first_letter_histogram,
 41 |         big_data: result,
 42 |     }
 43 | }
 44 | 
 45 | #[test]
 46 | fn overall_data() {
 47 |     let number_of_words = 500_000;
 48 |     let word_length = 15;
 49 | 
 50 |     let big_data = generate_random_lines(number_of_words, word_length);
 51 | 
 52 |     println!(
 53 |         "Memory usage after loading words: {:.1}mb\n",
 54 |         PEAK_ALLOC.current_usage_as_mb()
 55 |     );
 56 | 
 57 |     let mut data_trie = DataTrie::new();
 58 | 
 59 |     for word in big_data.big_data {
 60 |         data_trie.insert(&word, 1000);
 61 |     }
 62 | 
 63 |     println!(
 64 |         "Memory usage after data trie generation: {:.1}mb",
 65 |         PEAK_ALLOC.current_usage_as_mb()
 66 |     );
 67 | 
 68 |     let now = Instant::now();
 69 | 
 70 |     assert_eq!(data_trie.len(), big_data.number_of_words);
 71 |     assert_eq!(
 72 |         word_length,
 73 |         data_trie.get_longest()[0].graphemes(true).count()
 74 |     );
 75 |     assert_eq!(
 76 |         word_length,
 77 |         data_trie.get_shortest()[0].graphemes(true).count()
 78 |     );
 79 | 
 80 |     for (first_letter, count) in big_data.first_letter_histogram.iter() {
 81 |         assert_eq!(*count, data_trie.len_prefix(first_letter));
 82 |     }
 83 | 
 84 |     for (first_letter, count) in big_data.first_letter_histogram.iter() {
 85 |         assert_eq!(
 86 |             vec![1000; *count],
 87 |             data_trie.remove_prefix(first_letter).unwrap()
 88 |         );
 89 |     }
 90 | 
 91 |     assert!(data_trie.is_empty());
 92 | 
 93 |     let elapsed = now.elapsed();
 94 |     println!("Operations time: {:.2?}", elapsed);
 95 | }
 96 | 
 97 | #[test]
 98 | fn clearing_data() {
 99 |     let number_of_words = 500_000;
100 |     let word_length = 15;
101 | 
102 |     let big_data = generate_random_lines(number_of_words, word_length);
103 | 
104 |     println!(
105 |         "Memory usage after loading words: {:.1}mb\n",
106 |         PEAK_ALLOC.current_usage_as_mb()
107 |     );
108 | 
109 |     let mut data_trie = DataTrie::new();
110 | 
111 |     for word in big_data.big_data {
112 |         data_trie.insert(&word, 0);
113 |     }
114 | 
115 |     println!(
116 |         "Memory usage after data trie generation: {:.1}mb\n",
117 |         PEAK_ALLOC.current_usage_as_mb()
118 |     );
119 | 
120 |     let now = Instant::now();
121 | 
122 |     data_trie.clear();
123 |     assert!(data_trie.is_empty());
124 | 
125 |     let elapsed = now.elapsed();
126 |     println!("Operations time: {:.2?}\n", elapsed);
127 | 
128 |     println!(
129 |         "Memory usage after data trie cleanup: {:.1}mb\n",
130 |         PEAK_ALLOC.current_usage_as_mb()
131 |     );
132 | }
133 | 
134 | #[test]
135 | fn add_op_data_1() {
136 |     let number_of_words = 100_000;
137 |     let word_length = 15;
138 | 
139 |     let big_data = generate_random_lines(number_of_words, word_length);
140 | 
141 |     let mut data_trie_0 = DataTrie::new();
142 |     let mut data_trie_1 = DataTrie::new();
143 |     let mut data_trie_2 = DataTrie::new();
144 | 
145 |     println!(
146 |         "Memory usage after loading words: {:.1}mb\n",
147 |         PEAK_ALLOC.current_usage_as_mb()
148 |     );
149 | 
150 |     for line in big_data.big_data.iter() {
151 |         data_trie_0.insert(line, line.as_str());
152 |     }
153 | 
154 |     for line in big_data
155 |         .big_data
156 |         .iter()
157 |         .rev()
158 |         .skip(big_data.number_of_words / 2)
159 |     {
160 |         data_trie_1.insert(line, line.as_str());
161 |     }
162 | 
163 |     for line in big_data.big_data.iter().skip(big_data.number_of_words / 2) {
164 |         data_trie_2.insert(line, line.as_str());
165 |     }
166 | 
167 |     println!(
168 |         "Memory usage after data trie generation: {:.1}mb",
169 |         PEAK_ALLOC.current_usage_as_mb()
170 |     );
171 | 
172 |     let now = Instant::now();
173 | 
174 |     data_trie_1 += data_trie_2;
175 | 
176 |     let elapsed = now.elapsed();
177 |     println!("Operations time: {:.2?}", elapsed);
178 | 
179 |     let t1_words = data_trie_1.get_all();
180 |     let correct_words = data_trie_0.get_all();
181 | 
182 |     let item_set: HashSet<_> = t1_words.iter().collect();
183 |     let only_in_correct: Vec<_> = correct_words
184 |         .into_iter()
185 |         .filter(|item| !item_set.contains(item))
186 |         .collect();
187 | 
188 |     assert_eq!(only_in_correct.len(), 0);
189 |     println!("{}", big_data.number_of_words);
190 |     assert!(data_trie_0 == data_trie_1);
191 | 
192 |     println!(
193 |         "Memory usage after data trie addition: {:.1}mb\n",
194 |         PEAK_ALLOC.current_usage_as_mb()
195 |     );
196 | }
197 | 
198 | #[test]
199 | fn add_op_data_2() {
200 |     let number_of_words = 100_000;
201 |     let word_length = 15;
202 | 
203 |     let big_data = generate_random_lines(number_of_words, word_length);
204 | 
205 |     let mut data_trie_0 = DataTrie::new();
206 |     let mut data_trie_1 = DataTrie::new();
207 |     let mut data_trie_2 = DataTrie::new();
208 | 
209 |     println!(
210 |         "Memory usage after loading words: {:.1}mb\n",
211 |         PEAK_ALLOC.current_usage_as_mb()
212 |     );
213 | 
214 |     for line in big_data.big_data.iter() {
215 |         data_trie_0.insert(&line, line.as_str());
216 |     }
217 | 
218 |     for line in big_data.big_data.iter().rev().skip(number_of_words / 2) {
219 |         data_trie_1.insert(&line, line.as_str());
220 |     }
221 | 
222 |     for line in big_data.big_data.iter().skip(number_of_words / 2) {
223 |         data_trie_2.insert(&line, line.as_str());
224 |     }
225 | 
226 |     println!(
227 |         "Memory usage after data trie generation: {:.1}mb",
228 |         PEAK_ALLOC.current_usage_as_mb()
229 |     );
230 | 
231 |     let now = Instant::now();
232 | 
233 |     let data_trie_3 = data_trie_1 + data_trie_2;
234 |     let elapsed = now.elapsed();
235 |     println!("Operations time: {:.2?}", elapsed);
236 | 
237 |     let t3_words = data_trie_3.get_all();
238 |     let correct_words = data_trie_0.get_all();
239 | 
240 |     let item_set: HashSet<_> = t3_words.iter().collect();
241 |     let only_in_correct: Vec<_> = correct_words
242 |         .into_iter()
243 |         .filter(|item| !item_set.contains(item))
244 |         .collect();
245 | 
246 |     assert_eq!(only_in_correct.len(), 0);
247 |     assert!(data_trie_0 == data_trie_3);
248 | 
249 |     println!(
250 |         "Memory usage after data trie addition: {:.1}mb\n",
251 |         PEAK_ALLOC.current_usage_as_mb()
252 |     );
253 | }
254 | 
255 | #[test]
256 | fn equals_data() {
257 |     let number_of_words = 100_000;
258 |     let word_length = 15;
259 | 
260 |     let big_data = generate_random_lines(number_of_words, word_length);
261 | 
262 |     let mut data_trie_1 = DataTrie::new();
263 |     let mut data_trie_2 = DataTrie::new();
264 | 
265 |     println!(
266 |         "Memory usage after loading words: {:.1}mb\n",
267 |         PEAK_ALLOC.current_usage_as_mb()
268 |     );
269 | 
270 |     for line in big_data.big_data.iter() {
271 |         data_trie_1.insert(&line, line.as_str());
272 |     }
273 | 
274 |     for line in big_data.big_data.iter() {
275 |         data_trie_2.insert(&line, line.as_str());
276 |     }
277 | 
278 |     println!(
279 |         "Memory usage after data trie generation: {:.1}mb",
280 |         PEAK_ALLOC.current_usage_as_mb()
281 |     );
282 | 
283 |     let now = Instant::now();
284 | 
285 |     let result = data_trie_1 == data_trie_2;
286 | 
287 |     let elapsed = now.elapsed();
288 |     println!("Operations time: {:.2?}", elapsed);
289 | 
290 |     assert!(result);
291 | 
292 |     println!(
293 |         "Memory usage after data trie addition: {:.1}mb\n",
294 |         PEAK_ALLOC.current_usage_as_mb()
295 |     );
296 | }
297 | 


--------------------------------------------------------------------------------
/src/trie_node/data_node.rs:
--------------------------------------------------------------------------------
  1 | use fxhash::FxHashMap;
  2 | use std::cmp::Ordering;
  3 | use std::ops;
  4 | use thin_vec::ThinVec;
  5 | 
  6 | #[cfg(feature = "serde")]
  7 | use serde_crate::{Deserialize, Serialize};
  8 | 
  9 | type WordEnd<D> = Option<ThinVec<D>>;
 10 | 
 11 | /// Helper struct for returning multiple values for deleting data.
 12 | /// It is needed because the 'must_keep' value will at some point change
 13 | /// from false to true, but the data stays the same from the beginning of
 14 | /// unwinding.
 15 | pub(crate) struct RemoveData<D> {
 16 |     must_keep: bool,
 17 |     pub(crate) data: WordEnd<D>,
 18 | }
 19 | 
 20 | /// Singular trie node that represents its children and a marker for word ending.
 21 | #[derive(Debug, Default, Clone)]
 22 | #[cfg_attr(
 23 |     feature = "serde",
 24 |     derive(Serialize, Deserialize),
 25 |     serde(crate = "serde_crate")
 26 | )]
 27 | pub struct TrieDataNode<D> {
 28 |     #[cfg_attr(feature = "serde", serde(rename = "c"))]
 29 |     pub(crate) children: Box<FxHashMap<arrayvec::ArrayString<4>, TrieDataNode<D>>>,
 30 |     #[cfg_attr(feature = "serde", serde(rename = "wed"))]
 31 |     word_end_data: WordEnd<D>,
 32 | }
 33 | 
 34 | /// Methods only on nodes that have data.
 35 | impl<D> TrieDataNode<D> {
 36 |     /// Returns a new instance of a TrieNode.
 37 |     pub(crate) fn new() -> Self {
 38 |         TrieDataNode {
 39 |             children: Default::default(),
 40 |             word_end_data: None,
 41 |         }
 42 |     }
 43 | 
 44 |     /// Recursive function that drops all children maps and collects data
 45 |     /// regardless of having multiple words branching from them or not.
 46 |     pub(crate) fn remove_all_words_collect(&mut self, found_data: &mut Vec<D>) -> usize {
 47 |         let num_removed = self
 48 |             .children
 49 |             .values_mut()
 50 |             .map(|child| child.remove_all_words_collect(found_data))
 51 |             .sum::<usize>()
 52 |             + self.is_associated() as usize;
 53 | 
 54 |         if let Some(data_vec) = self.disassociate() {
 55 |             found_data.extend(data_vec);
 56 |         }
 57 | 
 58 |         self.clear_children();
 59 | 
 60 |         num_removed
 61 |     }
 62 | 
 63 |     /// Recursive function that counts the number of words from a starting node.
 64 |     pub(crate) fn count_words(&self) -> usize {
 65 |         self.children
 66 |             .values()
 67 |             .map(|child| child.count_words())
 68 |             .sum::<usize>()
 69 |             + self.is_associated() as usize
 70 |     }
 71 | 
 72 |     /// Recursive function finds every node that is an end of a word and appends
 73 |     /// its data as references to the passed vector.
 74 |     pub(crate) fn generate_all_data<'a>(&'a self, found_data: &mut Vec<&'a D>) {
 75 |         if let Some(data_vec) = &self.word_end_data {
 76 |             found_data.extend(data_vec.iter());
 77 |         }
 78 | 
 79 |         self.children
 80 |             .values()
 81 |             .for_each(|x| x.generate_all_data(found_data));
 82 |     }
 83 | 
 84 |     /// Recursive function finds every node that is an end of a word and appends
 85 |     /// its data as mutable references to the passed vector.
 86 |     pub(crate) fn generate_all_data_mut<'a>(&'a mut self, found_data: &mut Vec<&'a mut D>) {
 87 |         if let Some(data_vec) = &mut self.word_end_data {
 88 |             found_data.extend(data_vec.iter_mut());
 89 |         }
 90 | 
 91 |         self.children
 92 |             .values_mut()
 93 |             .for_each(|x| x.generate_all_data_mut(found_data));
 94 |     }
 95 | 
 96 |     /// Function pushes data to the association vector.
 97 |     pub(crate) fn push_data(&mut self, data: D) {
 98 |         self.get_association_mut().as_mut().unwrap().push(data);
 99 |     }
100 | 
101 |     /// Recursive function for inserting found words from the given node and
102 |     /// given starting substring.
103 |     pub(crate) fn find_words(&self, substring: &str, found_words: &mut Vec<String>) {
104 |         if self.is_associated() {
105 |             found_words.push(substring.to_string());
106 |         }
107 | 
108 |         self.children.iter().for_each(|(character, node)| {
109 |             node.find_words(&(substring.to_owned() + character), found_words)
110 |         });
111 |     }
112 | 
113 |     /// The recursive function for finding a vector of shortest and longest words in the TrieNode consists of:
114 |     /// - the DFS tree traversal part for getting to every child node;
115 |     /// - matching lengths of found words in combination with the passed ordering.
116 |     pub(crate) fn words_min_max(
117 |         &self,
118 |         substring: &str,
119 |         found_words: &mut Vec<String>,
120 |         ord: Ordering,
121 |     ) {
122 |         'word: {
123 |             if self.is_associated() {
124 |                 if let Some(found) = found_words.first() {
125 |                     match substring.len().cmp(&found.len()) {
126 |                         Ordering::Less if ord == Ordering::Less => {
127 |                             found_words.clear();
128 |                         }
129 |                         Ordering::Greater if ord == Ordering::Greater => {
130 |                             found_words.clear();
131 |                         }
132 |                         Ordering::Equal => (),
133 |                         _ => break 'word,
134 |                     }
135 |                 }
136 |                 found_words.push(substring.to_string());
137 |             }
138 |         }
139 | 
140 |         self.children.iter().for_each(|(character, node)| {
141 |             node.words_min_max(&(substring.to_owned() + character), found_words, ord)
142 |         });
143 |     }
144 | 
145 |     /// Function resets the association of a word and returns the
146 |     /// previous association. If 'keep_word' is true, the association is only
147 |     /// reset.
148 |     pub(crate) fn clear_word_end_association(&mut self, keep_word: bool) -> WordEnd<D> {
149 |         let return_data = self.disassociate();
150 | 
151 |         if keep_word && return_data.is_some() {
152 |             self.associate();
153 |         }
154 | 
155 |         return_data
156 |     }
157 | 
158 |     /// Recursive function for removing and freeing memory of a word that is not needed anymore.
159 |     /// The algorithm first finds the last node of a word given in the form of a character iterator,
160 |     /// then it frees the maps and unwinds to the first node that should not be deleted.
161 |     /// The first node that should not be deleted is either:
162 |     /// - the root node
163 |     /// - the node that has multiple words branching from it
164 |     /// - the node that represents an end to some word with the same prefix
165 |     /// The last node's data is propagated all the way to the final return
166 |     /// with the help of auxiliary 'RemoveData<D>' struct.
167 |     pub(crate) fn remove_one_word<'b>(
168 |         &mut self,
169 |         mut characters: impl Iterator<Item = &'b str>,
170 |     ) -> RemoveData<D> {
171 |         let next_character = match characters.next() {
172 |             None => {
173 |                 return RemoveData {
174 |                     must_keep: false,
175 |                     data: self.disassociate(),
176 |                 }
177 |             }
178 |             Some(char) => char,
179 |         };
180 | 
181 |         let next_node = self.children.get_mut(next_character).unwrap();
182 |         let must_keep = next_node.remove_one_word(characters);
183 | 
184 |         if self.children.len() > 1 || must_keep.must_keep {
185 |             return RemoveData {
186 |                 must_keep: true,
187 |                 data: must_keep.data,
188 |             };
189 |         }
190 |         self.clear_children();
191 | 
192 |         RemoveData {
193 |             must_keep: self.is_associated(),
194 |             data: must_keep.data,
195 |         }
196 |     }
197 | 
198 |     /// Function marks the node as an end of a word.
199 |     pub(crate) fn associate(&mut self) {
200 |         self.word_end_data = Some(ThinVec::new());
201 |     }
202 | 
203 |     /// Function unmarks the node as an end of a word and returns the data.
204 |     pub(crate) fn disassociate(&mut self) -> WordEnd<D> {
205 |         self.word_end_data.take()
206 |     }
207 | 
208 |     /// Function returns true if an association is found for the word.
209 |     pub(crate) fn is_associated(&self) -> bool {
210 |         self.word_end_data.is_some()
211 |     }
212 | 
213 |     /// Function returns the node association.
214 |     pub(crate) fn get_association(&self) -> &WordEnd<D> {
215 |         &self.word_end_data
216 |     }
217 | 
218 |     /// Function returns the mutable node association.
219 |     pub(crate) fn get_association_mut(&mut self) -> &mut WordEnd<D> {
220 |         &mut self.word_end_data
221 |     }
222 | 
223 |     /// Function removes all children of a node.
224 |     pub(crate) fn clear_children(&mut self) {
225 |         self.children = Default::default();
226 |     }
227 | }
228 | 
229 | impl<D> ops::AddAssign for TrieDataNode<D> {
230 |     /// Overriding the += operator on nodes.
231 |     /// Function adds two nodes based on the principle:
232 |     /// for every child node and character in the 'rhs' node:
233 |     /// - if the self node doesn't have that character in its children map,
234 |     /// simply move the pointer to the self's children map without any extra cost;
235 |     /// - if the self node has that character, the node of that character (self's child)
236 |     /// is added with the 'rhc's' node.
237 |     /// An edge case exists when the 'rhc's' node has an association but self's node doesn't.
238 |     /// That association is handled based on the result of 'rhc_next_node.word_end_data'.
239 |     /// On Some(data), the self node vector is initialized with the 'rhc' node vector.
240 |     fn add_assign(&mut self, rhs: Self) {
241 |         for (char, mut rhs_next_node) in rhs.children.into_iter() {
242 |             // Does self contain the character?
243 |             match self.children.remove(&*char) {
244 |                 // The whole node is removed, as owned, operated on and returned in self's children.
245 |                 Some(mut self_next_node) => {
246 |                     // Edge case: associate self node if the other node is also associated
247 |                     // Example: when adding 'word' to 'word1', 'd' on 'word' needs to be associated
248 |                     if let Some(data_vec_rhs) = rhs_next_node.word_end_data.take() {
249 |                         if let Some(data_vec_self) = &mut self_next_node.word_end_data {
250 |                             data_vec_self.extend(data_vec_rhs);
251 |                         } else {
252 |                             self_next_node.word_end_data = Some(data_vec_rhs);
253 |                         }
254 |                     }
255 | 
256 |                     self_next_node += rhs_next_node;
257 |                     self.children.insert(char, self_next_node);
258 |                 }
259 |                 // Self doesn't contain the character, no conflict arises.
260 |                 // The whole 'rhs' node is just moved from 'rhs' into self.
261 |                 None => {
262 |                     self.children.insert(char, rhs_next_node);
263 |                 }
264 |             }
265 |         }
266 |     }
267 | }
268 | 
269 | impl<D: PartialEq> PartialEq for TrieDataNode<D> {
270 |     /// Operation == can be applied only to TrieNodes whose data implements PartialEq.
271 |     fn eq(&self, other: &Self) -> bool {
272 |         // If keys aren't equal, nodes aren't equal.
273 |         if !(self.children.len() == other.children.len()
274 |             && self.children.keys().all(|k| other.children.contains_key(k)))
275 |         {
276 |             return false;
277 |         }
278 | 
279 |         // If associations aren't equal, two nodes aren't equal.
280 |         if !match (&self.word_end_data, &other.word_end_data) {
281 |             (Some(self_vec), Some(other_vec)) => {
282 |                 // If they both have an association, return true only if the data is identical
283 |                 self_vec.len() == other_vec.len() && self_vec.iter().all(|k| other_vec.contains(k))
284 |             }
285 |             // If they both don't have an association, return true
286 |             (None, None) => true,
287 |             _ => false,
288 |         } {
289 |             return false;
290 |         }
291 | 
292 |         // Every child node that has the same key (character) must be equal.
293 |         self.children
294 |             .iter()
295 |             .map(|(char, self_child)| (self_child, other.children.get(char).unwrap()))
296 |             .all(|(self_child, other_child)| other_child == self_child)
297 |     }
298 | }
299 | 


--------------------------------------------------------------------------------
/src/trie/regular_trie.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::Ordering;
  2 | use std::ops;
  3 | 
  4 | use arrayvec::ArrayString;
  5 | #[cfg(feature = "serde")]
  6 | use serde_crate::{Deserialize, Serialize};
  7 | 
  8 | use crate::trie::get_characters;
  9 | use crate::trie_node::TrieDatalessNode;
 10 | 
 11 | #[derive(Debug, Default, Clone)]
 12 | #[cfg_attr(
 13 |     feature = "serde",
 14 |     derive(Serialize, Deserialize),
 15 |     serde(crate = "serde_crate")
 16 | )]
 17 | pub struct Trie {
 18 |     root: TrieDatalessNode,
 19 |     len: usize,
 20 | }
 21 | 
 22 | impl Trie {
 23 |     pub fn new() -> Self {
 24 |         Trie {
 25 |             root: TrieDatalessNode::new(),
 26 |             len: 0,
 27 |         }
 28 |     }
 29 | 
 30 |     /// Insert a word into the trie, with no corresponding data.
 31 |     ///
 32 |     /// # Examples
 33 |     ///
 34 |     /// ```
 35 |     /// use basic_trie::Trie;
 36 |     /// let mut trie = Trie::new();
 37 |     ///
 38 |     /// trie.insert("word1");
 39 |     /// assert_eq!(vec![String::from("word1")], trie.get_all());
 40 |     /// ```
 41 |     pub fn insert(&mut self, word: &str) {
 42 |         let characters = get_characters(word);
 43 |         let mut current = &mut self.root;
 44 | 
 45 |         for character in characters {
 46 |             current = current
 47 |                 .children
 48 |                 .entry(ArrayString::from(character).unwrap())
 49 |                 .or_default();
 50 |         }
 51 | 
 52 |         if !current.is_associated() {
 53 |             self.len += 1;
 54 |         }
 55 | 
 56 |         current.associate();
 57 |     }
 58 | 
 59 |     /// Removes a word from the trie.
 60 |     /// If the word is a prefix to some word, some word
 61 |     /// isn't removed from the trie.
 62 |     ///
 63 |     /// # Examples
 64 |     ///
 65 |     /// ```
 66 |     /// use basic_trie::Trie;
 67 |     /// let mut trie = Trie::new();
 68 |     ///
 69 |     /// trie.insert("word");
 70 |     /// trie.insert("wording");
 71 |     ///
 72 |     /// trie.remove("word");
 73 |     /// assert_eq!(vec![String::from("wording")], trie.get("word").unwrap());
 74 |     ///
 75 |     /// trie.remove("wording");
 76 |     /// assert_eq!(Vec::<String>::new(), trie.get_all());
 77 |     /// ```
 78 |     pub fn remove(&mut self, word: &str) {
 79 |         let Some(current) = self.get_final_node_mut(word) else {
 80 |             return;
 81 |         };
 82 | 
 83 |         let characters = get_characters(word);
 84 | 
 85 |         if !current.children.is_empty() {
 86 |             return if current.is_associated() {
 87 |                 current.disassociate();
 88 |                 self.len -= 1;
 89 |             };
 90 |         }
 91 | 
 92 |         self.root.remove_one_word(characters.into_iter());
 93 |         self.len -= 1;
 94 |     }
 95 | 
 96 |     /// Removes every word that begins with 'prefix'.
 97 |     /// Not including the word 'prefix' if it's present.
 98 |     ///
 99 |     /// # Examples
100 |     ///
101 |     /// ```
102 |     /// use basic_trie::Trie;
103 |     /// let mut trie = Trie::new();
104 |     ///
105 |     /// trie.insert("eat");
106 |     /// trie.insert("eats");
107 |     /// trie.insert("eating");
108 |     /// trie.insert("eatings");
109 |     /// trie.insert("ea");
110 |     ///
111 |     /// trie.remove_prefix("ea");
112 |     ///
113 |     /// assert_eq!(vec![String::from("ea")], trie.get_all());
114 |     /// ```
115 |     pub fn remove_prefix(&mut self, prefix: &str) {
116 |         let Some(current) = self.get_final_node_mut(prefix) else {
117 |             return;
118 |         };
119 | 
120 |         // (current.is_associated() as usize) is added (subtracted twice) to
121 |         // not remove the current word from the count. Literal '1' is not used
122 |         // because of calling this function on the root node where 1 should
123 |         // not be added.
124 |         self.len -= current.remove_all_words() - (current.is_associated() as usize);
125 |     }
126 | 
127 |     /// Returns an option enum with a vector of owned strings
128 |     /// representing all found words that begin with 'query'.
129 |     /// If the word 'query' doesn't exist, None is returned.
130 |     ///
131 |     /// # Examples
132 |     ///
133 |     /// ```
134 |     /// use basic_trie::Trie;
135 |     /// let mut trie = Trie::new();
136 |     ///
137 |     /// trie.insert("word1");
138 |     /// trie.insert("word2");
139 |     ///
140 |     /// let all_correct_words = vec![String::from("word1"), String::from("word2")];
141 |     /// let mut found_words = trie.get("word").unwrap();
142 |     /// found_words.sort();
143 |     /// assert_eq!(all_correct_words, found_words);
144 |     /// ```
145 |     pub fn get(&self, query: &str) -> Option<Vec<String>> {
146 |         let mut substring = String::new();
147 |         let mut current_node = &self.root;
148 |         let characters = get_characters(query);
149 | 
150 |         for character in characters {
151 |             current_node = match current_node.children.get(character) {
152 |                 None => return None,
153 |                 Some(trie_node) => {
154 |                     substring.push_str(character);
155 |                     trie_node
156 |                 }
157 |             }
158 |         }
159 | 
160 |         let mut words_vec = Vec::new();
161 |         current_node.find_words(&substring, &mut words_vec);
162 | 
163 |         Some(words_vec)
164 |     }
165 | 
166 |     /// Returns the vector of longest words found in the trie.
167 |     ///
168 |     /// # Examples
169 |     ///
170 |     /// ```
171 |     /// use basic_trie::Trie;
172 |     /// let mut trie = Trie::new();
173 |     ///
174 |     /// trie.insert("shortwrd");
175 |     /// trie.insert("verylongword");
176 |     /// trie.insert("somelongword");
177 |     ///
178 |     /// let longest_words = vec![String::from("somelongword"), String::from("verylongword")];
179 |     /// let mut found_words = trie.get_longest();
180 |     /// found_words.sort();
181 |     /// assert_eq!(longest_words, found_words);
182 |     /// ```
183 |     pub fn get_longest(&self) -> Vec<String> {
184 |         let mut words = Vec::new();
185 |         self.root.words_min_max("", &mut words, Ordering::Greater);
186 |         words
187 |     }
188 | 
189 |     /// Returns the vector of shortest words found in the trie.
190 |     ///
191 |     /// # Examples
192 |     ///
193 |     /// ```
194 |     /// use basic_trie::Trie;
195 |     /// let mut trie = Trie::new();
196 |     ///
197 |     /// trie.insert("shortwrd");
198 |     /// trie.insert("rlyshort");
199 |     /// trie.insert("verylongword");
200 |     ///
201 |     /// let shortest_word = vec![String::from("rlyshort"), String::from("shortwrd")];
202 |     /// let mut found_words = trie.get_shortest();
203 |     /// found_words.sort();
204 |     /// assert_eq!(shortest_word, found_words);
205 |     /// ```
206 |     pub fn get_shortest(&self) -> Vec<String> {
207 |         let mut words = Vec::new();
208 |         self.root.words_min_max("", &mut words, Ordering::Less);
209 |         words
210 |     }
211 | 
212 |     /// Returns the number of words in the trie.
213 |     ///
214 |     /// # Examples
215 |     ///
216 |     /// ```
217 |     /// use basic_trie::Trie;
218 |     /// let mut trie = Trie::new();
219 |     ///
220 |     /// trie.insert("word1");
221 |     /// trie.insert("word2");
222 |     /// trie.insert("word3");
223 |     /// trie.insert("word4");
224 |     /// assert_eq!(4, trie.len());
225 |     ///
226 |     /// trie.remove("word1");
227 |     /// assert_eq!(3, trie.len());
228 |     ///
229 |     /// trie.remove_prefix("w");
230 |     /// assert_eq!(0, trie.len());
231 |     /// ```
232 |     pub fn len(&self) -> usize {
233 |         self.len
234 |     }
235 | 
236 |     /// Returns the number of words that start with 'prefix'.
237 |     /// If the sequence 'prefix' is not found, None is returned.
238 |     ///
239 |     /// # Examples
240 |     /// ```
241 |     /// use basic_trie::Trie;
242 |     /// let mut trie = Trie::new();
243 |     ///
244 |     /// trie.insert("word1");
245 |     /// trie.insert("word2");
246 |     /// trie.insert("word3");
247 |     /// trie.insert("word4");
248 |     /// trie.insert("word");
249 |     /// assert_eq!(4, trie.len_prefix("word"));
250 |     /// ```
251 |     pub fn len_prefix(&self, prefix: &str) -> usize {
252 |         match self.get_final_node(prefix) {
253 |             None => 0,
254 |             Some(node) => node.count_words() - node.is_associated() as usize,
255 |         }
256 |     }
257 | 
258 |     /// Returns an option enum with a vector of owned strings
259 |     /// representing all words in the trie.
260 |     /// Order is not guaranteed.
261 |     ///
262 |     /// # Examples
263 |     ///
264 |     /// ```
265 |     /// use basic_trie::Trie;
266 |     /// let mut trie = Trie::new();
267 |     ///
268 |     /// trie.insert("word1");
269 |     /// trie.insert("word2");
270 |     /// trie.insert("word3");
271 |     /// trie.insert("word4");
272 |     /// trie.insert("word5");
273 |     ///
274 |     /// let all_words = vec![
275 |     ///     String::from("word1"), String::from("word2"), String::from("word3"),
276 |     ///     String::from("word4"), String::from("word5")
277 |     /// ];
278 |     ///
279 |     /// let mut found_words = trie.get_all();
280 |     /// found_words.sort();
281 |     ///
282 |     /// assert_eq!(all_words, found_words);
283 |     /// ```
284 |     pub fn get_all(&self) -> Vec<String> {
285 |         self.get("").unwrap()
286 |     }
287 | 
288 |     /// Returns true if the trie contains 'query' as a word.
289 |     ///
290 |     /// # Examples
291 |     ///
292 |     /// ```
293 |     /// use basic_trie::Trie;
294 |     /// let mut trie = Trie::new();
295 |     ///
296 |     /// trie.insert("word");
297 |     /// assert!(trie.contains("word"));
298 |     /// assert!(!trie.contains("notfound"));
299 |     /// ```
300 |     pub fn contains(&self, query: &str) -> bool {
301 |         self.get_final_node(query)
302 |             .map_or(false, |node| node.is_associated())
303 |     }
304 | 
305 |     /// Returns true if no words are in the trie.
306 |     ///
307 |     /// # Examples
308 |     ///
309 |     /// ```
310 |     /// use basic_trie::Trie;
311 |     /// let mut trie = Trie::new();
312 |     ///
313 |     /// trie.insert("word");
314 |     /// trie.remove("word");
315 |     ///
316 |     /// assert!(trie.is_empty());
317 |     /// ```
318 |     pub fn is_empty(&self) -> bool {
319 |         self.len == 0
320 |     }
321 | 
322 |     /// Removes all words from the trie.
323 |     ///
324 |     /// # Examples
325 |     ///
326 |     /// ```
327 |     /// use basic_trie::Trie;
328 |     /// let mut trie = Trie::new();
329 |     ///
330 |     /// trie.insert("word1");
331 |     /// trie.insert("word2");
332 |     /// trie.insert("word3");
333 |     /// trie.insert("word4");
334 |     ///
335 |     /// trie.clear();
336 |     /// assert!(trie.is_empty());
337 |     /// assert_eq!(0, trie.len());
338 |     /// ```
339 |     pub fn clear(&mut self) {
340 |         self.root.clear_children();
341 |         self.len = 0;
342 |     }
343 | 
344 |     /// Function for getting the last node in a character sequence.
345 |     fn get_final_node(&self, query: &str) -> Option<&TrieDatalessNode> {
346 |         let mut current = &self.root;
347 | 
348 |         for character in get_characters(query) {
349 |             current = match current.children.get(character) {
350 |                 None => return None,
351 |                 Some(next_node) => next_node,
352 |             }
353 |         }
354 | 
355 |         Some(current)
356 |     }
357 | 
358 |     /// Function for getting the last node in a character sequence (mutable).
359 |     fn get_final_node_mut(&mut self, query: &str) -> Option<&mut TrieDatalessNode> {
360 |         let mut current = &mut self.root;
361 | 
362 |         for character in get_characters(query) {
363 |             current = match current.children.get_mut(character) {
364 |                 None => return None,
365 |                 Some(next_node) => next_node,
366 |             }
367 |         }
368 | 
369 |         Some(current)
370 |     }
371 | }
372 | 
373 | impl ops::Add for Trie {
374 |     type Output = Trie;
375 | 
376 |     /// Operation + merges two tries, leaving out duplicate words.
377 |     /// The smaller trie is always added to the larger one for efficiency.
378 |     ///
379 |     /// # Examples
380 |     ///
381 |     /// ```
382 |     /// use basic_trie::Trie;
383 |     /// let mut trie_1 = Trie::new();
384 |     /// trie_1.insert("word1");
385 |     /// trie_1.insert("word2");
386 |     /// trie_1.insert("word");
387 |     ///
388 |     /// let mut trie_2 = Trie::new();
389 |     /// trie_2.insert("word3");
390 |     /// trie_2.insert("word");
391 |     ///
392 |     /// let mut correct = Trie::new();
393 |     /// correct.insert("word");
394 |     /// correct.insert("word1");
395 |     /// correct.insert("word2");
396 |     /// correct.insert("word3");
397 |     ///
398 |     /// let trie_3 = trie_1 + trie_2;
399 |     ///
400 |     /// assert_eq!(trie_3, correct);
401 |     /// ```
402 |     fn add(self, rhs: Self) -> Self::Output {
403 |         let (smaller, mut bigger) = if self.len < rhs.len {
404 |             (self, rhs)
405 |         } else {
406 |             (rhs, self)
407 |         };
408 | 
409 |         bigger.root += smaller.root;
410 | 
411 |         // Number of words needs to be recalculated.
412 |         bigger.len = bigger.root.count_words();
413 | 
414 |         bigger
415 |     }
416 | }
417 | 
418 | impl ops::AddAssign for Trie {
419 |     /// Operation += merges two tries, leaving out duplicate words.
420 |     ///
421 |     /// # Examples
422 |     ///
423 |     /// ```
424 |     /// use basic_trie::Trie;
425 |     /// let mut trie_1 = Trie::new();
426 |     /// trie_1.insert("word1");
427 |     /// trie_1.insert("word2");
428 |     /// trie_1.insert("word");
429 |     ///
430 |     /// let mut trie_2 = Trie::new();
431 |     /// trie_2.insert("word3");
432 |     /// trie_2.insert("word");
433 |     ///
434 |     /// let mut correct = Trie::new();
435 |     /// correct.insert("word");
436 |     /// correct.insert("word1");
437 |     /// correct.insert("word2");
438 |     /// correct.insert("word3");
439 |     ///
440 |     /// trie_1 += trie_2;
441 |     ///
442 |     /// assert_eq!(trie_1, correct);
443 |     /// ```
444 |     fn add_assign(&mut self, rhs: Self) {
445 |         self.root += rhs.root;
446 | 
447 |         // Number of words needs to be recalculated.
448 |         self.len = self.root.count_words();
449 |     }
450 | }
451 | 
452 | impl PartialEq for Trie {
453 |     /// # Examples
454 |     ///
455 |     /// ```
456 |     /// use basic_trie::Trie;
457 |     /// let mut trie_1 = Trie::new();
458 |     /// trie_1.insert("test");
459 |     ///
460 |     /// let mut trie_2 = Trie::new();
461 |     /// trie_2.insert("test");
462 |     ///
463 |     /// assert_eq!(trie_1, trie_2);
464 |     ///
465 |     /// trie_2.insert("test2");
466 |     ///
467 |     /// assert_ne!(trie_1, trie_2);
468 |     /// ```
469 |     fn eq(&self, other: &Self) -> bool {
470 |         self.len == other.len && self.root == other.root
471 |     }
472 | }
473 | 


--------------------------------------------------------------------------------
/src/trie/data_trie.rs:
--------------------------------------------------------------------------------
  1 | use crate::trie::get_characters;
  2 | use crate::trie_node::TrieDataNode;
  3 | use arrayvec::ArrayString;
  4 | use std::cmp::Ordering;
  5 | use std::ops;
  6 | 
  7 | #[cfg(feature = "serde")]
  8 | use serde_crate::{Deserialize, Serialize};
  9 | 
 10 | #[derive(Debug, Default, Clone)]
 11 | #[cfg_attr(
 12 |     feature = "serde",
 13 |     derive(Serialize, Deserialize),
 14 |     serde(crate = "serde_crate")
 15 | )]
 16 | pub struct DataTrie<D> {
 17 |     root: TrieDataNode<D>,
 18 |     len: usize,
 19 | }
 20 | 
 21 | impl<D> DataTrie<D> {
 22 |     /// Returns a new instance of the trie.
 23 |     pub fn new() -> Self {
 24 |         DataTrie {
 25 |             root: TrieDataNode::new(),
 26 |             len: 0,
 27 |         }
 28 |     }
 29 | 
 30 |     /// Insert a word into the trie, with the corresponding data.
 31 |     ///
 32 |     /// # Examples
 33 |     ///
 34 |     /// ```
 35 |     /// use basic_trie::DataTrie;
 36 |     /// let mut trie = DataTrie::new();
 37 |     ///
 38 |     /// trie.insert("word1", "somedata");
 39 |     /// assert_eq!(vec![String::from("word1")], trie.get_all());
 40 |     /// ```
 41 |     pub fn insert(&mut self, word: &str, associated_data: D) {
 42 |         let characters = get_characters(word);
 43 |         let mut current = &mut self.root;
 44 | 
 45 |         for character in characters {
 46 |             current = current
 47 |                 .children
 48 |                 .entry(ArrayString::from(character).unwrap())
 49 |                 .or_insert_with(TrieDataNode::new);
 50 |         }
 51 | 
 52 |         if !current.is_associated() {
 53 |             self.len += 1;
 54 |             current.associate();
 55 |         }
 56 | 
 57 |         current.push_data(associated_data);
 58 |     }
 59 | 
 60 |     /// Insert a word into the trie, with no corresponding data.
 61 |     /// This function is very different from inserting a word into
 62 |     /// a regular trie, since it enables later attachment of data
 63 |     /// onto the inserted word. Type of trie must be annotated if
 64 |     /// this is the first function call.
 65 |     ///
 66 |     /// # Examples
 67 |     ///
 68 |     /// ```
 69 |     /// use basic_trie::DataTrie;
 70 |     /// let mut trie = DataTrie::<&str>::new();
 71 |     ///
 72 |     /// trie.insert_no_data("word1");
 73 |     /// assert_eq!(vec![String::from("word1")], trie.get_all());
 74 |     ///
 75 |     /// trie.insert("word1", "somedata");
 76 |     /// assert_eq!(vec![&"somedata"], trie.get_data("word1", false).unwrap());
 77 |     /// ```
 78 |     pub fn insert_no_data(&mut self, word: &str) {
 79 |         let characters = get_characters(word);
 80 |         let mut current = &mut self.root;
 81 | 
 82 |         for character in characters {
 83 |             current = current
 84 |                 .children
 85 |                 .entry(ArrayString::from(character).unwrap())
 86 |                 .or_insert_with(TrieDataNode::new);
 87 |         }
 88 | 
 89 |         if !current.is_associated() {
 90 |             self.len += 1;
 91 |             current.associate();
 92 |         }
 93 |     }
 94 | 
 95 |     /// Removes a word from the trie and returns data associated with that word.
 96 |     /// If the word is a prefix to some word, some word isn't removed from the trie.
 97 |     /// If the word is not found, None is returned.
 98 |     ///
 99 |     /// # Examples
100 |     ///
101 |     /// ```
102 |     /// use basic_trie::DataTrie;
103 |     /// let mut trie = DataTrie::new();
104 |     ///
105 |     /// trie.insert("word", "somedata");
106 |     /// trie.insert("wording", "somedata2");
107 |     ///
108 |     /// let removed_data1 = trie.remove("word");
109 |     /// assert_eq!(vec![String::from("wording")], trie.get("word").unwrap());
110 |     /// assert_eq!(vec![&"somedata2"], trie.get_data("word", true).unwrap());
111 |     /// assert_eq!(vec!["somedata"], removed_data1.unwrap());
112 |     ///
113 |     /// let removed_data2 = trie.remove("wording");
114 |     /// assert_eq!(Vec::<String>::new(), trie.get_all());
115 |     /// assert_eq!(vec!["somedata2"], removed_data2.unwrap());
116 |     /// ```
117 |     pub fn remove(&mut self, word: &str) -> Option<Vec<D>> {
118 |         let current = self.get_final_node_mut(word)?;
119 | 
120 |         if !current.children.is_empty() {
121 |             return current.clear_word_end_association(false).map(|data_vec| {
122 |                 self.len -= 1;
123 |                 data_vec.into_iter().collect()
124 |             });
125 |         }
126 | 
127 |         let characters = get_characters(word);
128 | 
129 |         self.root
130 |             .remove_one_word(characters.into_iter())
131 |             .data
132 |             .map_or(Some(Vec::new()), |data_vec| {
133 |                 self.len -= 1;
134 |                 Some(data_vec.into_iter().collect())
135 |             })
136 |     }
137 | 
138 |     /// Removes every word that begins with 'prefix' and collects all removed data.
139 |     /// Not including the word 'prefix' if it's present.
140 |     /// If the sequence 'prefix' is not found, None is returned.
141 |     ///
142 |     /// # Examples
143 |     ///
144 |     /// ```
145 |     /// use basic_trie::DataTrie;
146 |     /// let mut trie = DataTrie::new();
147 |     ///
148 |     /// trie.insert("eat", "somedata");
149 |     /// trie.insert("eats", "somedata2");
150 |     /// trie.insert("eating", "somedata3");
151 |     /// trie.insert("eatings", "somedata4");
152 |     /// trie.insert("ea", "somedata5");
153 |     ///
154 |     /// let mut removed_data = trie.remove_prefix("ea").unwrap();
155 |     /// removed_data.sort();
156 |     ///
157 |     /// assert_eq!(vec![String::from("ea")], trie.get_all());
158 |     /// assert_eq!(vec!["somedata", "somedata2", "somedata3", "somedata4"], removed_data);
159 |     /// ```
160 |     pub fn remove_prefix(&mut self, prefix: &str) -> Option<Vec<D>> {
161 |         let current = self.get_final_node_mut(prefix)?;
162 | 
163 |         let mut data_vec = Vec::new();
164 | 
165 |         // Sum must be applied to the node's children and not to the node
166 |         // itself because the recursive function must disassociate a node
167 |         // to put its data in the vector. The optimization of adding one
168 |         // to the count when the node in question isn't root can't be used
169 |         // since the original node would've been already disassociated therefore
170 |         // not accounted for in self.len.
171 |         let word_count = current
172 |             .children
173 |             .values_mut()
174 |             .map(|child| child.remove_all_words_collect(&mut data_vec))
175 |             .sum::<usize>();
176 |         current.clear_children();
177 | 
178 |         self.len -= word_count;
179 | 
180 |         Some(data_vec)
181 |     }
182 | 
183 |     /// Returns a vector of references to data of some word or references
184 |     /// to all found data of some word prefix when 'soft_match' is set to true.
185 |     /// If the word is not found and 'soft_match' is set to false, None is returned.
186 |     ///
187 |     /// # Examples
188 |     ///
189 |     /// ```
190 |     /// use basic_trie::DataTrie;
191 |     /// let mut trie = DataTrie::new();
192 |     ///
193 |     /// trie.insert("word1", "somedata");
194 |     /// trie.insert("word2", "somemoredata");
195 |     ///
196 |     /// let hard_data = vec![&"somedata"];
197 |     /// assert_eq!(hard_data, trie.get_data("word1", false).unwrap());
198 |     ///
199 |     /// let soft_data = vec![&"somedata", &"somemoredata"];
200 |     /// let mut found_data = trie.get_data("word", true).unwrap();
201 |     /// found_data.sort();
202 |     /// assert_eq!(soft_data, found_data);
203 |     /// ```
204 |     pub fn get_data(&self, query: &str, soft_match: bool) -> Option<Vec<&D>> {
205 |         let current = self.get_final_node(query)?;
206 | 
207 |         return if soft_match {
208 |             let mut soft_match_data = Vec::new();
209 |             current.generate_all_data(&mut soft_match_data);
210 | 
211 |             Some(soft_match_data)
212 |         } else {
213 |             current
214 |                 .get_association()
215 |                 .as_ref()
216 |                 .map(|data_vec| data_vec.iter().collect())
217 |         };
218 |     }
219 | 
220 |     /// Returns a vector of mutable references to data of some word that equals 'query'
221 |     /// or mutable references to all found data of words that begin with 'query'
222 |     /// when 'soft_match' is set to true.
223 |     /// If the word is not found and 'soft_match' is set to false, None is returned.
224 |     ///
225 |     /// ```
226 |     /// use basic_trie::DataTrie;
227 |     /// let mut trie = DataTrie::new();
228 |     ///
229 |     /// trie.insert("word1", "somedata");
230 |     /// trie.insert("word2", "somemoredata");
231 |     /// trie.insert("word1", "evenmoredata");
232 |     ///
233 |     /// let mut found_data = trie.get_data_mut("word1", false).unwrap();
234 |     ///
235 |     /// *found_data[0] = "changeddata";
236 |     /// *found_data[1] = "bigchanges";
237 |     ///
238 |     /// let hard_data = vec![&"changeddata", &"bigchanges"];
239 |     /// assert_eq!(hard_data, trie.get_data("word1", false).unwrap());
240 |     ///
241 |     /// let soft_data = vec![&"0", &"1", &"2"];
242 |     /// let mut found_data_mut = trie.get_data_mut("word", true).unwrap();
243 |     /// found_data_mut.sort();
244 |     /// *found_data_mut[0] = "0";
245 |     /// *found_data_mut[1] = "1";
246 |     /// *found_data_mut[2] = "2";
247 |     /// assert_eq!(soft_data, found_data_mut);
248 |     /// ```
249 |     pub fn get_data_mut(&mut self, query: &str, soft_match: bool) -> Option<Vec<&mut D>> {
250 |         let current = self.get_final_node_mut(query)?;
251 | 
252 |         return if soft_match {
253 |             let mut soft_match_data = Vec::new();
254 |             current.generate_all_data_mut(&mut soft_match_data);
255 | 
256 |             Some(soft_match_data)
257 |         } else {
258 |             current
259 |                 .get_association_mut()
260 |                 .as_mut()
261 |                 .map(|data_vec| data_vec.iter_mut().collect())
262 |         };
263 |     }
264 | 
265 |     /// Clears and returns data of some word. If the word is not found returns None.
266 |     /// If there is no data associated to the word, an empty vector is returned.
267 |     ///
268 |     /// # Examples
269 |     ///
270 |     /// ```
271 |     /// use basic_trie::DataTrie;
272 |     /// let mut trie = DataTrie::new();
273 |     ///
274 |     /// trie.insert("word", "data1");
275 |     /// trie.insert("word", "data2");
276 |     /// trie.insert("word", "data3");
277 |     /// let found_data = trie.clear_data("word");
278 |     ///
279 |     /// assert_eq!(Vec::<&&str>::new(), trie.get_data("word", false).unwrap());
280 |     /// assert_eq!(vec!["data1", "data2", "data3"], found_data.unwrap());
281 |     /// ```
282 |     pub fn clear_data(&mut self, word: &str) -> Option<Vec<D>> {
283 |         let current = self.get_final_node_mut(word)?;
284 | 
285 |         current
286 |             .clear_word_end_association(true)
287 |             .map(|data_vec| data_vec.into_iter().collect())
288 |     }
289 | 
290 |     /// Returns an option enum with a vector of owned strings
291 |     /// representing all found words that begin with 'query'.
292 |     /// If the word 'query' doesn't exist, None is returned.
293 |     ///
294 |     /// # Examples
295 |     ///
296 |     /// ```
297 |     /// use basic_trie::DataTrie;
298 |     /// let mut data_trie = DataTrie::new();
299 |     ///
300 |     /// data_trie.insert("word1", 1);
301 |     /// data_trie.insert("word2", 2);
302 |     ///
303 |     /// let all_correct_words = vec![String::from("word1"), String::from("word2")];
304 |     /// let mut found_words = data_trie.get("word").unwrap();
305 |     /// found_words.sort();
306 |     /// assert_eq!(all_correct_words, found_words);
307 |     /// ```
308 |     pub fn get(&self, query: &str) -> Option<Vec<String>> {
309 |         let mut substring = String::new();
310 |         let mut current_node = &self.root;
311 |         let characters = get_characters(query);
312 | 
313 |         for character in characters {
314 |             current_node = match current_node.children.get(character) {
315 |                 None => return None,
316 |                 Some(trie_node) => {
317 |                     substring.push_str(character);
318 |                     trie_node
319 |                 }
320 |             }
321 |         }
322 | 
323 |         let mut words_vec = Vec::new();
324 |         current_node.find_words(&substring, &mut words_vec);
325 | 
326 |         Some(words_vec)
327 |     }
328 | 
329 |     /// Returns the vector of longest words found in the trie.
330 |     ///
331 |     /// # Examples
332 |     ///
333 |     /// ```
334 |     /// use basic_trie::DataTrie;
335 |     /// let mut data_trie = DataTrie::new();
336 |     ///
337 |     /// data_trie.insert("shortwrd", 1);
338 |     /// data_trie.insert("verylongword", 2);
339 |     /// data_trie.insert("somelongword", 2);
340 |     ///
341 |     /// let longest_words = vec![String::from("somelongword"), String::from("verylongword")];
342 |     /// let mut found_words = data_trie.get_longest();
343 |     /// found_words.sort();
344 |     /// assert_eq!(longest_words, found_words);
345 |     /// ```
346 |     pub fn get_longest(&self) -> Vec<String> {
347 |         let mut words = Vec::new();
348 |         self.root.words_min_max("", &mut words, Ordering::Greater);
349 |         words
350 |     }
351 | 
352 |     /// Returns the vector of shortest words found in the trie.
353 |     ///
354 |     /// # Examples
355 |     ///
356 |     /// ```
357 |     /// use basic_trie::DataTrie;
358 |     /// let mut data_trie = DataTrie::new();
359 |     ///
360 |     /// data_trie.insert("shortwrd", 1);
361 |     /// data_trie.insert("rlyshort", 2);
362 |     /// data_trie.insert("verylongword", 3);
363 |     ///
364 |     /// let shortest_word = vec![String::from("rlyshort"), String::from("shortwrd")];
365 |     /// let mut found_words = data_trie.get_shortest();
366 |     /// found_words.sort();
367 |     /// assert_eq!(shortest_word, found_words);
368 |     /// ```
369 |     pub fn get_shortest(&self) -> Vec<String> {
370 |         let mut words = Vec::new();
371 |         self.root.words_min_max("", &mut words, Ordering::Less);
372 |         words
373 |     }
374 | 
375 |     /// Returns the number of words in the trie.
376 |     ///
377 |     /// # Examples
378 |     ///
379 |     /// ```
380 |     /// use basic_trie::DataTrie;
381 |     /// let mut data_trie = DataTrie::new();
382 |     ///
383 |     /// data_trie.insert("word1", 1);
384 |     /// data_trie.insert("word2", 2);
385 |     /// data_trie.insert("word3", 3);
386 |     /// data_trie.insert("word4", 4);
387 |     /// assert_eq!(4, data_trie.len());
388 |     ///
389 |     /// data_trie.remove("word1");
390 |     /// assert_eq!(3, data_trie.len());
391 |     ///
392 |     /// data_trie.remove_prefix("w");
393 |     /// assert_eq!(0, data_trie.len());
394 |     /// ```
395 |     pub fn len(&self) -> usize {
396 |         self.len
397 |     }
398 | 
399 |     /// Returns the number of words that start with 'prefix'.
400 |     /// If the sequence 'prefix' is not found, None is returned.
401 |     ///
402 |     /// # Examples
403 |     /// ```
404 |     /// use basic_trie::DataTrie;
405 |     /// let mut data_trie = DataTrie::new();
406 |     ///
407 |     /// data_trie.insert("word1", 1);
408 |     /// data_trie.insert("word2", 2);
409 |     /// data_trie.insert("word3", 3);
410 |     /// data_trie.insert("word4", 4);
411 |     /// data_trie.insert("word", 0);
412 |     /// assert_eq!(4, data_trie.len_prefix("word"));
413 |     /// ```
414 |     pub fn len_prefix(&self, prefix: &str) -> usize {
415 |         match self.get_final_node(prefix) {
416 |             None => 0,
417 |             Some(node) => node.count_words() - node.is_associated() as usize,
418 |         }
419 |     }
420 | 
421 |     /// Returns an option enum with a vector of owned strings
422 |     /// representing all words in the trie.
423 |     /// Order is not guaranteed.
424 |     ///
425 |     /// # Examples
426 |     ///
427 |     /// ```
428 |     /// use basic_trie::DataTrie;
429 |     /// let mut data_trie = DataTrie::new();
430 |     ///
431 |     /// data_trie.insert("word1", 1);
432 |     /// data_trie.insert("word2", 2);
433 |     /// data_trie.insert("word3", 3);
434 |     /// data_trie.insert("word4", 4);
435 |     /// data_trie.insert("word5", 5);
436 |     ///
437 |     /// let all_words = vec![
438 |     ///     String::from("word1"), String::from("word2"), String::from("word3"),
439 |     ///     String::from("word4"), String::from("word5")
440 |     /// ];
441 |     ///
442 |     /// let mut found_words = data_trie.get_all();
443 |     /// found_words.sort();
444 |     ///
445 |     /// assert_eq!(all_words, found_words);
446 |     /// ```
447 |     pub fn get_all(&self) -> Vec<String> {
448 |         self.get("").unwrap()
449 |     }
450 | 
451 |     /// Returns true if the trie contains 'query' as a word.
452 |     ///
453 |     /// # Examples
454 |     ///
455 |     /// ```
456 |     /// use basic_trie::DataTrie;
457 |     /// let mut data_trie = DataTrie::new();
458 |     ///
459 |     /// data_trie.insert("word", 0);
460 |     /// assert!(data_trie.contains("word"));
461 |     /// assert!(!data_trie.contains("notfound"));
462 |     /// ```
463 |     pub fn contains(&self, query: &str) -> bool {
464 |         self.get_final_node(query)
465 |             .map_or(false, |node| node.is_associated())
466 |     }
467 | 
468 |     /// Returns true if no words are in the trie.
469 |     ///
470 |     /// # Examples
471 |     ///
472 |     /// ```
473 |     /// use basic_trie::Trie;
474 |     /// let mut data_trie = Trie::new();
475 |     ///
476 |     /// data_trie.insert("word");
477 |     /// data_trie.remove("word");
478 |     ///
479 |     /// assert!(data_trie.is_empty());
480 |     /// ```
481 |     pub fn is_empty(&self) -> bool {
482 |         self.len == 0
483 |     }
484 | 
485 |     /// Removes all words from the trie.
486 |     ///
487 |     /// # Examples
488 |     ///
489 |     /// ```
490 |     /// use basic_trie::Trie;
491 |     /// let mut data_trie = Trie::new();
492 |     ///
493 |     /// data_trie.insert("word1");
494 |     /// data_trie.insert("word2");
495 |     /// data_trie.insert("word3");
496 |     /// data_trie.insert("word4");
497 |     ///
498 |     /// data_trie.clear();
499 |     /// assert!(data_trie.is_empty());
500 |     /// assert_eq!(0, data_trie.len());
501 |     /// ```
502 |     pub fn clear(&mut self) {
503 |         self.root.clear_children();
504 |         self.len = 0;
505 |     }
506 | 
507 |     /// Function for getting the last node in a character sequence.
508 |     fn get_final_node(&self, query: &str) -> Option<&TrieDataNode<D>> {
509 |         let mut current = &self.root;
510 | 
511 |         for character in get_characters(query) {
512 |             current = match current.children.get(character) {
513 |                 None => return None,
514 |                 Some(next_node) => next_node,
515 |             }
516 |         }
517 | 
518 |         Some(current)
519 |     }
520 | 
521 |     /// Function for getting the last node in a character sequence (mutable).
522 |     fn get_final_node_mut(&mut self, query: &str) -> Option<&mut TrieDataNode<D>> {
523 |         let mut current = &mut self.root;
524 | 
525 |         for character in get_characters(query) {
526 |             current = match current.children.get_mut(character) {
527 |                 None => return None,
528 |                 Some(next_node) => next_node,
529 |             }
530 |         }
531 | 
532 |         Some(current)
533 |     }
534 | }
535 | 
536 | impl<D> ops::Add for DataTrie<D> {
537 |     type Output = DataTrie<D>;
538 | 
539 |     /// Operation + merges two tries, leaving out duplicate words.
540 |     /// The smaller trie is always added to the larger one for efficiency.
541 |     ///
542 |     /// # Examples
543 |     ///
544 |     /// ```
545 |     /// use basic_trie::DataTrie;
546 |     /// let mut data_trie_1 = DataTrie::new();
547 |     /// data_trie_1.insert("word1", 1);
548 |     /// data_trie_1.insert("word2", 2);
549 |     /// data_trie_1.insert("word", 0);
550 |     ///
551 |     /// let mut data_trie_2 = DataTrie::new();
552 |     /// data_trie_2.insert("word3", 3);
553 |     /// data_trie_2.insert_no_data("word");
554 |     ///
555 |     /// let mut correct = DataTrie::new();
556 |     /// correct.insert("word", 0);
557 |     /// correct.insert("word1", 1);
558 |     /// correct.insert("word2", 2);
559 |     /// correct.insert("word3", 3);
560 |     ///
561 |     /// let data_trie_3 = data_trie_1 + data_trie_2;
562 |     ///
563 |     /// assert_eq!(data_trie_3, correct);
564 |     /// ```
565 |     fn add(self, rhs: Self) -> Self::Output {
566 |         let (smaller, mut bigger) = if self.len < rhs.len {
567 |             (self, rhs)
568 |         } else {
569 |             (rhs, self)
570 |         };
571 | 
572 |         bigger.root += smaller.root;
573 | 
574 |         // Number of words needs to be recalculated.
575 |         bigger.len = bigger.root.count_words();
576 | 
577 |         bigger
578 |     }
579 | }
580 | 
581 | impl<D> ops::AddAssign for DataTrie<D> {
582 |     /// Operation += merges two tries, leaving out duplicate words.
583 |     ///
584 |     /// # Examples
585 |     ///
586 |     /// ```
587 |     /// use basic_trie::DataTrie;
588 |     /// let mut data_trie_1 = DataTrie::new();
589 |     /// data_trie_1.insert("word1", 1);
590 |     /// data_trie_1.insert("word2", 2);
591 |     /// data_trie_1.insert("word", 0);
592 |     ///
593 |     /// let mut data_trie_2 = DataTrie::new();
594 |     /// data_trie_2.insert("word3", 3);
595 |     /// data_trie_2.insert_no_data("word");
596 |     ///
597 |     /// let mut correct = DataTrie::new();
598 |     /// correct.insert("word", 0);
599 |     /// correct.insert("word1", 1);
600 |     /// correct.insert("word2", 2);
601 |     /// correct.insert("word3", 3);
602 |     ///
603 |     /// data_trie_1 += data_trie_2;
604 |     ///
605 |     /// assert_eq!(data_trie_1, correct);
606 |     /// ```
607 |     fn add_assign(&mut self, rhs: Self) {
608 |         self.root += rhs.root;
609 | 
610 |         // Number of words needs to be recalculated.
611 |         self.len = self.root.count_words();
612 |     }
613 | }
614 | 
615 | impl<D: PartialEq> PartialEq for DataTrie<D> {
616 |     /// Operation '==' can be applied only to tries whose data implements PartialEq.
617 |     ///
618 |     /// # Examples
619 |     ///
620 |     /// ```
621 |     /// use basic_trie::DataTrie;
622 |     /// let mut data_trie_1 = DataTrie::new();
623 |     /// data_trie_1.insert("test", 1);
624 |     ///
625 |     /// let mut data_trie_2 = DataTrie::new();
626 |     /// data_trie_2.insert("test", 1);
627 |     ///
628 |     /// assert_eq!(data_trie_1, data_trie_2);
629 |     ///
630 |     /// data_trie_2.insert("test2", 2);
631 |     ///
632 |     /// assert_ne!(data_trie_1, data_trie_2);
633 |     /// ```
634 |     fn eq(&self, other: &Self) -> bool {
635 |         self.len == other.len && self.root == other.root
636 |     }
637 | }
638 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | //! # Basic Trie
   2 | //!
   3 | //! [![Test CI](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml/badge.svg)](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml)
   4 | //!
   5 | //! The trie data structure is used for quick access to words and
   6 | //! data that should (could) be associated with them.
   7 | //!
   8 | //! **Basic Trie** is implemented as a tree where each node holds a single character
   9 | //! that could point at any other character thus allowing insertion of arbitrary words.
  10 | //!
  11 | //! #### There are two major implementations:
  12 | //! - Trie where words are inserted with nothing attached to them
  13 | //! - Data Trie where each word has a corresponding vector of data attached to it
  14 | //!
  15 | //! Regular tries are often used for word lookups and prefix matching, and data tries are
  16 | //! often used for finding all data that is connected to some prefix.
  17 | //!
  18 | //! For example, when inserting a whole book in the trie, you could insert every word with
  19 | //! the corresponding page number it's on. Later when searching for the word, you could get all
  20 | //! the pages the word is on with no added performance cost.
  21 | //!
  22 | //! ## Global features
  23 | //! - insertion / removal of words
  24 | //! - fast contains check
  25 | //! - finding words based on a prefix
  26 | //! - longest / shortest words in the trie
  27 | //! - generic methods: `is_empty`, `len`, `clear`
  28 | //! - Trie equality with `==`
  29 | //! - Trie merging with `+` or `+=`
  30 | //!
  31 | //! ## Data Trie features
  32 | //! - generic type implementation for associating a word to any type, with zero trait constraints
  33 | //! - finding data of words based on exact match or prefix
  34 | //!
  35 | //! ## Optional features
  36 | //! - unicode support via the 'unicode' feature with the `unicode-segmentation` crate (enabled by default)
  37 | //! - data trie support via the 'data' feature (enabled by default)
  38 | //! - serialization and deserialization via the 'serde' feature with the `serde` crate
  39 | //!
  40 | //! ## Dependencies
  41 | //! - `unicode-segmentation` (enabled by default)
  42 | //! - `serde` (only with 'serde' feature flag)
  43 | //! - `fxhash`
  44 | //! - `thin-vec`
  45 | //! - `arrayvec`
  46 | //!
  47 | //! ## License
  48 | //! The software is licensed under the MIT license.
  49 | //!
  50 | //! ## Examples
  51 | //!
  52 | //! ```rust
  53 | //!  use basic_trie::Trie;
  54 | //!
  55 | //!  let mut trie = Trie::new();
  56 | //!  trie.insert("eat");
  57 | //!  trie.insert("eating");
  58 | //!  trie.insert("wizard");
  59 | //!
  60 | //!  let mut found_longest_words = trie.get_longest();
  61 | //!  found_longest_words.sort();
  62 | //!
  63 | //!  assert!(trie.contains("wizard"));
  64 | //!  assert_eq!(vec![String::from("eating"), String::from("wizard")], found_longest_words);
  65 | //!  assert_eq!(vec![String::from("eat")], trie.get_shortest());
  66 | //!  assert_eq!(3, trie.len());
  67 | //!  ```
  68 | //!
  69 | //!  ```rust
  70 | //!  use basic_trie::DataTrie;
  71 | //!
  72 | //!  let mut data_trie = DataTrie::<u32>::new();
  73 | //!  data_trie.insert("apple", 1);
  74 | //!  data_trie.insert("apple", 2);
  75 | //!  data_trie.insert_no_data("banana");
  76 | //!  data_trie.insert("avocado", 15);
  77 | //!
  78 | //! let mut found_data = data_trie.get_data("apple", false).unwrap();
  79 | //! found_data.sort();
  80 | //! assert_eq!(vec![&1, &2], found_data);
  81 | //!
  82 | //! let mut found_data = data_trie.get_data("a", true).unwrap();
  83 | //! found_data.sort();
  84 | //! assert_eq!(vec![&1, &2, &15], found_data);
  85 | //!
  86 | //! assert_eq!(vec![15], data_trie.remove("avocado").unwrap());
  87 | //!  ```
  88 | //!
  89 | //! ## Changelog
  90 | //! - **2.0.0** - Major redesign: increased memory efficiency for the regular Trie (used to be Dataless Trie);
  91 | //! Changed API names to better match the standard library; splitting the two implementations code-wise thus
  92 | //! fixing the documentation not rendering bug.
  93 | //! - **1.2.3** – Adding dependencies for even more memory layout optimisations.
  94 | //! - **1.2.2** – More memory optimisations with Box.
  95 | //! - **1.2.1** – Memory performance upgrade with Box. Mutable data retrieval.
  96 | //! - **1.2.0** – Equality and addition operators support between
  97 | //! same Trie types via `==`, `+` and `+=`.
  98 | //! - **1.1.1** – Adding `FxHashMap` dependency for boosted performance.
  99 | //! - **1.1.0** – Serialization with the `serde` crate and the 'serde' feature.
 100 | //! - **1.0.3** – Optimisation of `number_of_words()`. Removing lifetime requirements
 101 | //! for word insertion for much better flexibility at the same logical memory cost.
 102 | //! - **1.0.2** – Bug fixes.
 103 | //! - **1.0.1** – `insert_no_data()` for `DataTrie`. Bugfixes.
 104 | //! - **1.0.0** – Separation of `DataTrie` and `DatalessTrie`. Optimizing
 105 | //! performance for `DatalessTrie`. Incompatible with older versions.
 106 | //! - **<1.0.0** – Simple `Trie` with data and base features.
 107 | //!
 108 | mod trie;
 109 | mod trie_node;
 110 | 
 111 | #[cfg(feature = "data")]
 112 | pub use trie::DataTrie;
 113 | 
 114 | pub use trie::Trie;
 115 | 
 116 | // Tests which are the same for both implementations,
 117 | // Regular is used for less verbose code.
 118 | #[cfg(test)]
 119 | mod general_trie_tests {
 120 |     use crate::Trie;
 121 | 
 122 |     #[test]
 123 |     fn find_words() {
 124 |         let found_words_correct = vec![
 125 |             String::from("word1"),
 126 |             String::from("word2"),
 127 |             String::from("word3"),
 128 |         ];
 129 | 
 130 |         let mut trie = Trie::new();
 131 | 
 132 |         trie.insert("word1");
 133 |         trie.insert("word2");
 134 |         trie.insert("word3");
 135 | 
 136 |         let mut found_words = trie.get("word").unwrap();
 137 |         found_words.sort();
 138 |         assert_eq!(found_words, found_words_correct);
 139 |     }
 140 | 
 141 |     #[test]
 142 |     fn longest_word() {
 143 |         let mut trie = Trie::new();
 144 | 
 145 |         trie.insert("a");
 146 |         assert_eq!(trie.get_longest(), vec![String::from("a")]);
 147 | 
 148 |         trie.insert("aa");
 149 |         assert_eq!(trie.get_longest(), vec![String::from("aa")]);
 150 | 
 151 |         trie.insert("aaa");
 152 |         assert_eq!(trie.get_longest(), vec![String::from("aaa")]);
 153 | 
 154 |         trie.insert("aaaa");
 155 |         assert_eq!(trie.get_longest(), vec![String::from("aaaa")]);
 156 | 
 157 |         trie.insert("a");
 158 |         assert_eq!(trie.get_longest(), vec![String::from("aaaa")]);
 159 |     }
 160 | 
 161 |     #[test]
 162 |     fn multiple_longest_words() {
 163 |         let mut trie = Trie::new();
 164 | 
 165 |         trie.insert("abba");
 166 |         trie.insert("cddc");
 167 | 
 168 |         let mut found_words = trie.get_longest();
 169 |         found_words.sort();
 170 | 
 171 |         assert_eq!(
 172 |             vec![String::from("abba"), String::from("cddc")],
 173 |             found_words
 174 |         );
 175 |     }
 176 | 
 177 |     #[test]
 178 |     fn shortest_word() {
 179 |         let mut trie = Trie::new();
 180 | 
 181 |         trie.insert("a");
 182 |         assert_eq!(trie.get_shortest(), vec![String::from("a")]);
 183 | 
 184 |         trie.insert("aa");
 185 |         assert_eq!(trie.get_shortest(), vec![String::from("a")]);
 186 | 
 187 |         trie.insert("aaa");
 188 |         assert_eq!(trie.get_shortest(), vec![String::from("a")]);
 189 | 
 190 |         trie.insert("aaaa");
 191 |         assert_eq!(trie.get_shortest(), vec![String::from("a")]);
 192 | 
 193 |         trie.insert("a");
 194 |         assert_eq!(trie.get_shortest(), vec![String::from("a")]);
 195 |     }
 196 | 
 197 |     #[test]
 198 |     fn multiple_shortest_words() {
 199 |         let mut trie = Trie::new();
 200 | 
 201 |         trie.insert("aaa");
 202 |         trie.insert("aaaa");
 203 |         trie.insert("aa");
 204 |         trie.insert("bb");
 205 | 
 206 |         let mut found_words = trie.get_shortest();
 207 |         found_words.sort();
 208 | 
 209 |         assert_eq!(vec![String::from("aa"), String::from("bb")], found_words);
 210 |     }
 211 | 
 212 |     #[test]
 213 |     fn number_of_words() {
 214 |         let mut trie = Trie::new();
 215 | 
 216 |         trie.insert("a");
 217 |         trie.insert("b");
 218 |         trie.insert("c");
 219 |         trie.insert("d");
 220 | 
 221 |         assert_eq!(4, trie.len());
 222 |     }
 223 | 
 224 |     #[test]
 225 |     fn same_word_twice() {
 226 |         let mut trie = Trie::new();
 227 | 
 228 |         trie.insert("twice");
 229 |         trie.insert("twice");
 230 | 
 231 |         assert_eq!(vec!["twice"], trie.get("twice").unwrap());
 232 |     }
 233 | 
 234 |     #[test]
 235 |     fn all_words() {
 236 |         let mut trie = Trie::new();
 237 | 
 238 |         trie.insert("a");
 239 |         trie.insert("ab");
 240 |         trie.insert("abc");
 241 |         trie.insert("abcd");
 242 | 
 243 |         let all_words = vec![
 244 |             String::from("a"),
 245 |             String::from("ab"),
 246 |             String::from("abc"),
 247 |             String::from("abcd"),
 248 |         ];
 249 | 
 250 |         assert_eq!(all_words, trie.get_all())
 251 |     }
 252 | 
 253 |     #[cfg(feature = "unicode")]
 254 |     #[test]
 255 |     fn unicode() {
 256 |         let mut trie = Trie::new();
 257 | 
 258 |         trie.insert("а");
 259 |         trie.insert("аб");
 260 |         trie.insert("абц");
 261 |         trie.insert("абцд");
 262 | 
 263 |         let all_words = vec![
 264 |             String::from("а"),
 265 |             String::from("аб"),
 266 |             String::from("абц"),
 267 |             String::from("абцд"),
 268 |         ];
 269 | 
 270 |         assert_eq!(all_words, trie.get_all())
 271 |     }
 272 | 
 273 |     #[test]
 274 |     fn clear() {
 275 |         let mut trie = Trie::new();
 276 |         trie.insert("word1");
 277 |         trie.insert("word2");
 278 |         trie.insert("word3");
 279 |         trie.insert("word4");
 280 |         trie.insert("word5");
 281 | 
 282 |         trie.clear();
 283 |     }
 284 | }
 285 | 
 286 | #[cfg(feature = "data")]
 287 | #[cfg(test)]
 288 | mod data_trie_tests {
 289 |     use super::DataTrie;
 290 | 
 291 |     #[test]
 292 |     fn find_data_soft_match() {
 293 |         let found_data_correct = vec![&1, &2, &3];
 294 | 
 295 |         let mut trie = DataTrie::new();
 296 | 
 297 |         trie.insert("word1", 1);
 298 |         trie.insert("word2", 2);
 299 |         trie.insert("word3", 3);
 300 | 
 301 |         let mut found_data = trie.get_data("word", true).unwrap();
 302 |         found_data.sort();
 303 |         assert_eq!(found_data, found_data_correct);
 304 |     }
 305 | 
 306 |     #[test]
 307 |     fn find_str_data_soft_match() {
 308 |         let found_data_correct = vec![&"data1", &"data2", &"data3"];
 309 | 
 310 |         let mut trie = DataTrie::new();
 311 | 
 312 |         trie.insert("word1", "data1");
 313 |         trie.insert("word2", "data2");
 314 |         trie.insert("word3", "data3");
 315 | 
 316 |         let mut found_data = trie.get_data("word", true).unwrap();
 317 |         found_data.sort();
 318 |         assert_eq!(found_data, found_data_correct);
 319 |     }
 320 | 
 321 |     #[test]
 322 |     fn find_data_hard_match() {
 323 |         let found_data_correct = vec![&1];
 324 | 
 325 |         let mut trie = DataTrie::new();
 326 | 
 327 |         trie.insert("word1", 1);
 328 |         trie.insert("word2", 2);
 329 |         trie.insert("word3", 3);
 330 | 
 331 |         let mut found_data = trie.get_data("word1", false).unwrap();
 332 |         found_data.sort();
 333 |         assert_eq!(found_data, found_data_correct);
 334 |     }
 335 | 
 336 |     #[test]
 337 |     fn find_data_hard_match_not_found() {
 338 |         let found_data_correct = None;
 339 | 
 340 |         let mut trie = DataTrie::new();
 341 | 
 342 |         trie.insert("word1", 1);
 343 |         trie.insert("word2", 2);
 344 |         trie.insert("word3", 3);
 345 | 
 346 |         let found_data = trie.get_data("word", false);
 347 | 
 348 |         assert_eq!(found_data, found_data_correct);
 349 |     }
 350 | 
 351 |     #[test]
 352 |     fn same_word_twice_different_data() {
 353 |         let mut trie = DataTrie::new();
 354 | 
 355 |         trie.insert("twice", 5);
 356 |         trie.insert("twice", 3);
 357 | 
 358 |         assert_eq!(vec![&5, &3], trie.get_data("twice", true).unwrap());
 359 |     }
 360 | 
 361 |     #[test]
 362 |     fn clear_word_data() {
 363 |         let mut trie = DataTrie::new();
 364 | 
 365 |         trie.insert("twice", 5);
 366 |         let data = trie.clear_data("twice");
 367 |         trie.insert("twice", 3);
 368 | 
 369 |         assert_eq!(vec![&3], trie.get_data("twice", true).unwrap());
 370 |         assert_eq!(vec![5], data.unwrap());
 371 |     }
 372 | 
 373 |     #[test]
 374 |     fn clear_word_no_data() {
 375 |         let mut trie = DataTrie::new();
 376 | 
 377 |         trie.insert("word1", 5);
 378 |         let data = trie.clear_data("word2");
 379 | 
 380 |         assert_eq!(None, data);
 381 |     }
 382 | 
 383 |     #[test]
 384 |     fn remove_word1() {
 385 |         let mut trie = DataTrie::new();
 386 | 
 387 |         trie.insert("a", 5);
 388 |         trie.insert("ab", 5);
 389 |         trie.insert("abc", 5);
 390 |         trie.insert("abcd", 5);
 391 | 
 392 |         trie.remove("a");
 393 | 
 394 |         let all_words = vec![
 395 |             String::from("ab"),
 396 |             String::from("abc"),
 397 |             String::from("abcd"),
 398 |         ];
 399 | 
 400 |         assert_eq!(all_words, trie.get_all())
 401 |     }
 402 | 
 403 |     #[test]
 404 |     fn remove_word_final() {
 405 |         let mut trie = DataTrie::new();
 406 | 
 407 |         trie.insert("a", 5);
 408 |         trie.insert("ab", 5);
 409 |         trie.insert("abc", 5);
 410 |         trie.insert("abcd", 5);
 411 | 
 412 |         trie.remove("abcd");
 413 | 
 414 |         let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abc")];
 415 | 
 416 |         let mut all_words = trie.get_all();
 417 |         all_words.sort();
 418 | 
 419 |         assert_eq!(all_correct_words, all_words);
 420 |     }
 421 | 
 422 |     #[test]
 423 |     fn remove_word_2() {
 424 |         let mut trie = DataTrie::new();
 425 | 
 426 |         trie.insert("a", 5);
 427 |         trie.insert("ab", 5);
 428 |         trie.insert("abc", 5);
 429 |         trie.insert("abcd", 5);
 430 | 
 431 |         trie.remove("abc");
 432 | 
 433 |         let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abcd")];
 434 | 
 435 |         let mut all_words = trie.get_all();
 436 |         all_words.sort();
 437 | 
 438 |         assert_eq!(all_correct_words, all_words);
 439 |         assert_eq!(vec![&5, &5, &5], trie.get_data("a", true).unwrap());
 440 |     }
 441 | 
 442 |     #[test]
 443 |     fn remove_word_3() {
 444 |         let mut trie = DataTrie::new();
 445 | 
 446 |         trie.insert("eat", 5);
 447 |         trie.insert("eating", 5);
 448 |         trie.insert("eats", 5);
 449 |         trie.insert("eatings", 5);
 450 | 
 451 |         trie.remove("eating");
 452 | 
 453 |         let all_correct_words = vec![
 454 |             String::from("eat"),
 455 |             String::from("eatings"),
 456 |             String::from("eats"),
 457 |         ];
 458 | 
 459 |         let mut all_words = trie.get_all();
 460 |         all_words.sort();
 461 | 
 462 |         assert_eq!(all_correct_words, all_words);
 463 |     }
 464 | 
 465 |     #[test]
 466 |     fn remove_word_4() {
 467 |         let mut trie = DataTrie::new();
 468 | 
 469 |         trie.insert("eat", 5);
 470 |         trie.insert("eating", 5);
 471 |         trie.insert("eats", 5);
 472 |         trie.insert("eatings", 5);
 473 | 
 474 |         trie.remove("eatings");
 475 | 
 476 |         let all_correct_words = vec![
 477 |             String::from("eat"),
 478 |             String::from("eating"),
 479 |             String::from("eats"),
 480 |         ];
 481 | 
 482 |         let mut all_words = trie.get_all();
 483 |         all_words.sort();
 484 | 
 485 |         assert_eq!(all_correct_words, all_words);
 486 |     }
 487 | 
 488 |     #[test]
 489 |     fn remove_word_5() {
 490 |         let mut trie = DataTrie::new();
 491 | 
 492 |         trie.insert("eat", 5);
 493 |         trie.insert("eating", 5);
 494 |         trie.insert("eats", 5);
 495 |         trie.insert("eatings", 5);
 496 | 
 497 |         let data = trie.remove("eatin");
 498 | 
 499 |         let all_correct_words = vec![
 500 |             String::from("eat"),
 501 |             String::from("eating"),
 502 |             String::from("eatings"),
 503 |             String::from("eats"),
 504 |         ];
 505 | 
 506 |         let mut all_words = trie.get_all();
 507 |         all_words.sort();
 508 | 
 509 |         assert_eq!(all_correct_words, all_words);
 510 |         assert_eq!(None, data);
 511 |     }
 512 | 
 513 |     #[test]
 514 |     fn remove_word_6() {
 515 |         let mut trie = DataTrie::new();
 516 | 
 517 |         trie.insert("eat", 5);
 518 |         trie.insert("eatings", 5);
 519 | 
 520 |         trie.remove("eatings");
 521 | 
 522 |         let all_correct_words = vec![String::from("eat")];
 523 | 
 524 |         let mut all_words = trie.get_all();
 525 |         all_words.sort();
 526 | 
 527 |         assert_eq!(all_correct_words, all_words);
 528 |     }
 529 | 
 530 |     #[test]
 531 |     fn remove_word_7() {
 532 |         let mut trie = DataTrie::new();
 533 | 
 534 |         trie.insert("eat", 3);
 535 |         trie.insert("eatings", 5);
 536 | 
 537 |         let data1 = trie.remove("eatings");
 538 | 
 539 |         let all_correct_words = vec![String::from("eat")];
 540 | 
 541 |         let mut all_words = trie.get_all();
 542 |         all_words.sort();
 543 | 
 544 |         assert_eq!(all_correct_words, all_words);
 545 | 
 546 |         assert_eq!(vec![5], data1.unwrap());
 547 | 
 548 |         let data2 = trie.remove("eat");
 549 | 
 550 |         assert_eq!(vec![3], data2.unwrap());
 551 |     }
 552 | 
 553 |     #[test]
 554 |     fn remove_word_8() {
 555 |         let mut trie = DataTrie::new();
 556 | 
 557 |         trie.insert("eat", 3);
 558 |         trie.insert("eats", 4);
 559 |         trie.insert("eatings", 5);
 560 | 
 561 |         let data = trie.remove("eats");
 562 | 
 563 |         let all_correct_words = vec![String::from("eat"), String::from("eatings")];
 564 | 
 565 |         let mut all_words = trie.get_all();
 566 |         all_words.sort();
 567 | 
 568 |         assert_eq!(all_correct_words, all_words);
 569 |         assert_eq!(vec![4], data.unwrap());
 570 | 
 571 |         let mut remaining_data = trie.get_data("eat", true).unwrap();
 572 |         remaining_data.sort();
 573 | 
 574 |         assert_eq!(vec![&3, &5], remaining_data);
 575 |     }
 576 | 
 577 |     #[test]
 578 |     fn remove_prefix_1() {
 579 |         let mut trie = DataTrie::new();
 580 | 
 581 |         trie.insert("eat", 3);
 582 |         trie.insert("eating", 4);
 583 |         trie.insert("eats", 5);
 584 |         trie.insert("eatings", 6);
 585 |         trie.insert("ea", 7);
 586 | 
 587 |         let mut removed_data = trie.remove_prefix("ea").unwrap();
 588 |         removed_data.sort();
 589 | 
 590 |         assert_eq!(vec![String::from("ea")], trie.get_all());
 591 |         assert_eq!(vec![3, 4, 5, 6], removed_data);
 592 |         assert_eq!(1, trie.len());
 593 |     }
 594 | 
 595 |     #[test]
 596 |     fn remove_prefix_2() {
 597 |         let mut trie = DataTrie::new();
 598 | 
 599 |         trie.insert("a1", 3);
 600 |         trie.insert("b2", 4);
 601 |         trie.insert("c3", 5);
 602 | 
 603 |         let mut removed_data = trie.remove_prefix("").unwrap();
 604 |         removed_data.sort();
 605 | 
 606 |         assert_eq!(Vec::<String>::new(), trie.get_all());
 607 |         assert!(trie.is_empty());
 608 |         assert_eq!(0, trie.len());
 609 |         assert_eq!(vec![3, 4, 5], removed_data);
 610 |     }
 611 | 
 612 |     #[cfg(feature = "unicode")]
 613 |     #[test]
 614 |     fn unicode_data() {
 615 |         let mut trie = DataTrie::new();
 616 | 
 617 |         trie.insert("а", 5);
 618 |         trie.insert("аб", 5);
 619 |         trie.insert("абц", 5);
 620 |         trie.insert("абцд", 5);
 621 | 
 622 |         let all_data = vec![&5, &5, &5, &5];
 623 | 
 624 |         assert_eq!(all_data, trie.get_data("а", true).unwrap())
 625 |     }
 626 | 
 627 |     #[test]
 628 |     fn insert_no_data() {
 629 |         let mut trie = DataTrie::<&str>::new();
 630 | 
 631 |         trie.insert_no_data("word1");
 632 |         assert_eq!(vec![String::from("word1")], trie.get_all());
 633 | 
 634 |         trie.insert("word1", "somedata");
 635 |         assert_eq!(vec![&"somedata"], trie.get_data("word1", false).unwrap());
 636 |     }
 637 | 
 638 |     #[test]
 639 |     fn equals_1() {
 640 |         let mut data_trie_1 = DataTrie::new();
 641 |         data_trie_1.insert("test", 1);
 642 | 
 643 |         let mut data_trie_2 = DataTrie::new();
 644 |         data_trie_2.insert("test", 1);
 645 | 
 646 |         assert_eq!(data_trie_1, data_trie_2);
 647 |     }
 648 | 
 649 |     #[test]
 650 |     fn equals_2() {
 651 |         let mut data_trie_1 = DataTrie::new();
 652 |         data_trie_1.insert("test", 1);
 653 | 
 654 |         let mut data_trie_2 = DataTrie::new();
 655 |         data_trie_2.insert("test", 1);
 656 |         data_trie_2.insert("test2", 1);
 657 | 
 658 |         assert_ne!(data_trie_1, data_trie_2);
 659 |     }
 660 | 
 661 |     #[test]
 662 |     fn equals_3() {
 663 |         let mut data_trie_1 = DataTrie::new();
 664 |         data_trie_1.insert("test", 1);
 665 |         data_trie_1.insert("test2", 1);
 666 | 
 667 |         let mut data_trie_2 = DataTrie::new();
 668 |         data_trie_2.insert("test", 1);
 669 | 
 670 |         assert_ne!(data_trie_1, data_trie_2);
 671 |     }
 672 | 
 673 |     #[test]
 674 |     fn add_two_tries_1() {
 675 |         let mut t1 = DataTrie::<i32>::new();
 676 |         t1.insert("word1", 1000);
 677 |         t1.insert("word2", 1000);
 678 |         t1.insert("apple", 1000);
 679 |         t1.insert("banana", 1000);
 680 | 
 681 |         let mut t2 = DataTrie::<i32>::new();
 682 |         t2.insert("word3", 1000);
 683 |         t2.insert("word4", 1000);
 684 |         t2.insert("potato", 1000);
 685 |         t2.insert("watermelon", 1000);
 686 | 
 687 |         let t3 = t1 + t2;
 688 | 
 689 |         let mut correct = DataTrie::<i32>::new();
 690 |         correct.insert("word1", 1000);
 691 |         correct.insert("word2", 1000);
 692 |         correct.insert("apple", 1000);
 693 |         correct.insert("banana", 1000);
 694 |         correct.insert("word3", 1000);
 695 |         correct.insert("word4", 1000);
 696 |         correct.insert("potato", 1000);
 697 |         correct.insert("watermelon", 1000);
 698 | 
 699 |         let mut t3_words = t3.get_all();
 700 |         let mut correct_words = correct.get_all();
 701 | 
 702 |         t3_words.sort();
 703 |         correct_words.sort();
 704 |         assert_eq!(t3_words, correct_words);
 705 |         assert_eq!(t3, correct);
 706 | 
 707 |         let t3_data = t3.get_data("", true).unwrap();
 708 |         assert_eq!(t3_data, Vec::from([&1000; 8]));
 709 |     }
 710 | 
 711 |     #[test]
 712 |     fn add_two_tries_2() {
 713 |         let mut t1 = DataTrie::<i32>::new();
 714 |         t1.insert("word1", 1000);
 715 |         t1.insert("word2", 1000);
 716 |         t1.insert("apple", 1000);
 717 |         t1.insert("banana", 1000);
 718 | 
 719 |         let mut t2 = DataTrie::<i32>::new();
 720 |         t2.insert("word3", 1000);
 721 |         t2.insert("word4", 1000);
 722 |         t2.insert("potato", 1000);
 723 |         t2.insert("watermelon", 1000);
 724 | 
 725 |         t1 += t2;
 726 | 
 727 |         let mut correct = DataTrie::<i32>::new();
 728 |         correct.insert("word1", 1000);
 729 |         correct.insert("word2", 1000);
 730 |         correct.insert("apple", 1000);
 731 |         correct.insert("banana", 1000);
 732 |         correct.insert("word3", 1000);
 733 |         correct.insert("word4", 1000);
 734 |         correct.insert("potato", 1000);
 735 |         correct.insert("watermelon", 1000);
 736 | 
 737 |         let mut t1_words = t1.get_all();
 738 |         let mut correct_words = correct.get_all();
 739 | 
 740 |         t1_words.sort();
 741 |         correct_words.sort();
 742 |         assert_eq!(t1_words, correct_words);
 743 |         assert_eq!(t1, correct);
 744 | 
 745 |         let t1_data = t1.get_data("", true).unwrap();
 746 |         assert_eq!(t1_data, Vec::from([&1000; 8]));
 747 |     }
 748 | 
 749 |     #[test]
 750 |     fn add_two_tries_3() {
 751 |         let mut t1 = DataTrie::<i32>::new();
 752 |         t1.insert("word1", 500);
 753 | 
 754 |         let mut t2 = DataTrie::<i32>::new();
 755 |         t2.insert("word2", 500);
 756 |         t2.insert("word", 500);
 757 | 
 758 |         t1 += t2;
 759 | 
 760 |         let mut correct = DataTrie::<i32>::new();
 761 |         correct.insert("word", 500);
 762 |         correct.insert("word1", 500);
 763 |         correct.insert("word2", 500);
 764 | 
 765 |         let mut t1_words = t1.get_all();
 766 |         let mut correct_words = correct.get_all();
 767 | 
 768 |         t1_words.sort();
 769 |         correct_words.sort();
 770 |         assert_eq!(t1_words, correct_words);
 771 |         assert_eq!(t1, correct);
 772 | 
 773 |         let t1_data = t1.get_data("", true).unwrap();
 774 |         assert_eq!(t1_data, Vec::from([&500; 3]));
 775 |     }
 776 | 
 777 |     #[test]
 778 |     fn add_two_tries_4() {
 779 |         let mut t1 = DataTrie::<i32>::new();
 780 |         t1.insert("word1", 500);
 781 |         t1.insert("word1", 500);
 782 |         t1.insert("word1", 500);
 783 | 
 784 |         let mut t2 = DataTrie::<i32>::new();
 785 |         t2.insert("word1", 500);
 786 |         t2.insert("word1", 500);
 787 |         t2.insert("word1", 500);
 788 | 
 789 |         t1 += t2;
 790 | 
 791 |         let mut correct = DataTrie::<i32>::new();
 792 |         correct.insert("word1", 500);
 793 | 
 794 |         let mut t1_words = t1.get_all();
 795 |         let mut correct_words = correct.get_all();
 796 | 
 797 |         t1_words.sort();
 798 |         correct_words.sort();
 799 |         assert_eq!(t1_words, correct_words);
 800 | 
 801 |         let t1_data = t1.get_data("", true).unwrap();
 802 |         assert_eq!(t1_data, Vec::from([&500; 6]));
 803 |     }
 804 | 
 805 |     #[test]
 806 |     fn add_two_tries_5() {
 807 |         let mut t1 = DataTrie::<i32>::new();
 808 |         t1.insert("word1", 500);
 809 |         t1.insert("word1", 500);
 810 |         t1.insert("word1", 500);
 811 | 
 812 |         let mut t2 = DataTrie::<i32>::new();
 813 |         t2.insert("word1", 500);
 814 |         t2.insert("word1", 500);
 815 |         t2.insert("word1", 500);
 816 | 
 817 |         t1 += t2;
 818 | 
 819 |         let mut correct = DataTrie::<i32>::new();
 820 |         correct.insert("word1", 500);
 821 | 
 822 |         let mut t1_words = t1.get_all();
 823 |         let mut correct_words = correct.get_all();
 824 | 
 825 |         t1_words.sort();
 826 |         correct_words.sort();
 827 |         assert_eq!(t1_words, correct_words);
 828 | 
 829 |         let t1_data = t1.get_data("", true).unwrap();
 830 |         assert_eq!(t1_data, Vec::from([&500; 6]));
 831 |     }
 832 | }
 833 | 
 834 | #[cfg(test)]
 835 | mod regular_trie_tests {
 836 |     use crate::Trie;
 837 | 
 838 |     #[test]
 839 |     fn insert_no_data() {
 840 |         let mut trie = Trie::new();
 841 | 
 842 |         let found_words_correct = vec![
 843 |             String::from("word1"),
 844 |             String::from("word2"),
 845 |             String::from("word3"),
 846 |         ];
 847 | 
 848 |         trie.insert("word1");
 849 |         trie.insert("word2");
 850 |         trie.insert("word3");
 851 | 
 852 |         let mut found_words = trie.get("word").unwrap();
 853 |         found_words.sort();
 854 | 
 855 |         assert_eq!(found_words, found_words_correct);
 856 |     }
 857 | 
 858 |     #[test]
 859 |     fn remove_word1() {
 860 |         let mut trie = Trie::new();
 861 | 
 862 |         trie.insert("a");
 863 |         trie.insert("ab");
 864 |         trie.insert("abc");
 865 |         trie.insert("abcd");
 866 | 
 867 |         trie.remove("a");
 868 | 
 869 |         let all_words = vec![
 870 |             String::from("ab"),
 871 |             String::from("abc"),
 872 |             String::from("abcd"),
 873 |         ];
 874 | 
 875 |         assert_eq!(all_words, trie.get_all())
 876 |     }
 877 | 
 878 |     #[test]
 879 |     fn remove_word_final() {
 880 |         let mut trie = Trie::new();
 881 | 
 882 |         trie.insert("a");
 883 |         trie.insert("ab");
 884 |         trie.insert("abc");
 885 |         trie.insert("abcd");
 886 | 
 887 |         trie.remove("abcd");
 888 | 
 889 |         let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abc")];
 890 | 
 891 |         let mut all_words = trie.get_all();
 892 |         all_words.sort();
 893 | 
 894 |         assert_eq!(all_correct_words, all_words);
 895 |     }
 896 | 
 897 |     #[test]
 898 |     fn remove_word_2() {
 899 |         let mut trie = Trie::new();
 900 | 
 901 |         trie.insert("a");
 902 |         trie.insert("ab");
 903 |         trie.insert("abc");
 904 |         trie.insert("abcd");
 905 | 
 906 |         trie.remove("abc");
 907 | 
 908 |         let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abcd")];
 909 | 
 910 |         let mut all_words = trie.get_all();
 911 |         all_words.sort();
 912 | 
 913 |         assert_eq!(all_correct_words, all_words);
 914 |     }
 915 | 
 916 |     #[test]
 917 |     fn remove_word_3() {
 918 |         let mut trie = Trie::new();
 919 | 
 920 |         trie.insert("eat");
 921 |         trie.insert("eating");
 922 |         trie.insert("eats");
 923 |         trie.insert("eatings");
 924 | 
 925 |         trie.remove("eating");
 926 | 
 927 |         let all_correct_words = vec![
 928 |             String::from("eat"),
 929 |             String::from("eatings"),
 930 |             String::from("eats"),
 931 |         ];
 932 | 
 933 |         let mut all_words = trie.get_all();
 934 |         all_words.sort();
 935 | 
 936 |         assert_eq!(all_correct_words, all_words);
 937 |     }
 938 | 
 939 |     #[test]
 940 |     fn remove_word_4() {
 941 |         let mut trie = Trie::new();
 942 | 
 943 |         trie.insert("eat");
 944 |         trie.insert("eating");
 945 |         trie.insert("eats");
 946 |         trie.insert("eatings");
 947 | 
 948 |         trie.remove("eatings");
 949 | 
 950 |         let all_correct_words = vec![
 951 |             String::from("eat"),
 952 |             String::from("eating"),
 953 |             String::from("eats"),
 954 |         ];
 955 | 
 956 |         let mut all_words = trie.get_all();
 957 |         all_words.sort();
 958 | 
 959 |         assert_eq!(all_correct_words, all_words);
 960 |     }
 961 | 
 962 |     #[test]
 963 |     fn remove_word_5() {
 964 |         let mut trie = Trie::new();
 965 | 
 966 |         trie.insert("eat");
 967 |         trie.insert("eating");
 968 |         trie.insert("eats");
 969 |         trie.insert("eatings");
 970 | 
 971 |         trie.remove("eatin");
 972 | 
 973 |         let all_correct_words = vec![
 974 |             String::from("eat"),
 975 |             String::from("eating"),
 976 |             String::from("eatings"),
 977 |             String::from("eats"),
 978 |         ];
 979 | 
 980 |         let mut all_words = trie.get_all();
 981 |         all_words.sort();
 982 | 
 983 |         assert_eq!(all_correct_words, all_words);
 984 |     }
 985 | 
 986 |     #[test]
 987 |     fn remove_word_6() {
 988 |         let mut trie = Trie::new();
 989 | 
 990 |         trie.insert("eat");
 991 |         trie.insert("eatings");
 992 | 
 993 |         trie.remove("eatings");
 994 | 
 995 |         let all_correct_words = vec![String::from("eat")];
 996 | 
 997 |         let mut all_words = trie.get_all();
 998 |         all_words.sort();
 999 | 
1000 |         assert_eq!(all_correct_words, all_words);
1001 |     }
1002 | 
1003 |     #[test]
1004 |     fn remove_word_7() {
1005 |         let mut trie = Trie::new();
1006 | 
1007 |         trie.insert("eat");
1008 |         trie.insert("eatings");
1009 | 
1010 |         trie.remove("eatings");
1011 | 
1012 |         let all_correct_words = vec![String::from("eat")];
1013 | 
1014 |         let mut all_words = trie.get_all();
1015 |         all_words.sort();
1016 | 
1017 |         assert_eq!(all_correct_words, all_words);
1018 |     }
1019 | 
1020 |     #[test]
1021 |     fn remove_word_8() {
1022 |         let mut trie = Trie::new();
1023 | 
1024 |         trie.insert("eat");
1025 |         trie.insert("eats");
1026 |         trie.insert("eating");
1027 | 
1028 |         trie.remove("eats");
1029 | 
1030 |         let all_correct_words = vec![String::from("eat"), String::from("eating")];
1031 | 
1032 |         let mut all_words = trie.get_all();
1033 |         all_words.sort();
1034 | 
1035 |         assert_eq!(all_correct_words, all_words);
1036 |     }
1037 | 
1038 |     #[test]
1039 |     fn remove_word_9() {
1040 |         let mut trie = Trie::new();
1041 | 
1042 |         trie.insert("123");
1043 |         trie.insert("1234");
1044 |         trie.insert("12345");
1045 | 
1046 |         trie.remove("1234");
1047 | 
1048 |         let all_correct_words = vec![String::from("123"), String::from("12345")];
1049 | 
1050 |         let mut all_words = trie.get_all();
1051 |         all_words.sort();
1052 | 
1053 |         assert_eq!(all_correct_words, all_words);
1054 |     }
1055 | 
1056 |     #[test]
1057 |     fn remove_prefix_1() {
1058 |         let mut trie = Trie::new();
1059 | 
1060 |         trie.insert("eat");
1061 |         trie.insert("eating");
1062 |         trie.insert("eats");
1063 |         trie.insert("eatings");
1064 |         trie.insert("ea");
1065 | 
1066 |         trie.remove_prefix("ea");
1067 | 
1068 |         assert_eq!(vec![String::from("ea")], trie.get_all());
1069 |         assert_eq!(1, trie.len());
1070 |     }
1071 | 
1072 |     #[test]
1073 |     fn remove_prefix_2() {
1074 |         let mut trie = Trie::new();
1075 | 
1076 |         trie.insert("a1");
1077 |         trie.insert("b2");
1078 |         trie.insert("c3");
1079 | 
1080 |         trie.remove_prefix("");
1081 | 
1082 |         assert_eq!(Vec::<String>::new(), trie.get_all());
1083 |         assert!(trie.is_empty());
1084 |         assert_eq!(0, trie.len());
1085 |     }
1086 | 
1087 |     #[test]
1088 |     fn equals() {
1089 |         let mut trie_1 = Trie::new();
1090 |         trie_1.insert("test");
1091 | 
1092 |         let mut trie_2 = Trie::new();
1093 |         trie_2.insert("test");
1094 | 
1095 |         assert_eq!(trie_1, trie_2);
1096 |     }
1097 | 
1098 |     #[test]
1099 |     fn add_two_tries_1() {
1100 |         let mut t1 = Trie::new();
1101 |         t1.insert("word1");
1102 |         t1.insert("word2");
1103 |         t1.insert("apple");
1104 |         t1.insert("banana");
1105 | 
1106 |         let mut t2 = Trie::new();
1107 |         t2.insert("word3");
1108 |         t2.insert("word4");
1109 |         t2.insert("potato");
1110 |         t2.insert("pineapple");
1111 | 
1112 |         let t3 = t1 + t2;
1113 | 
1114 |         let mut correct = Trie::new();
1115 |         correct.insert("word1");
1116 |         correct.insert("word2");
1117 |         correct.insert("apple");
1118 |         correct.insert("banana");
1119 |         correct.insert("word3");
1120 |         correct.insert("word4");
1121 |         correct.insert("potato");
1122 |         correct.insert("pineapple");
1123 | 
1124 |         let mut t3_words = t3.get_all();
1125 |         let mut correct_words = correct.get_all();
1126 | 
1127 |         t3_words.sort();
1128 |         correct_words.sort();
1129 |         assert_eq!(t3_words, correct_words);
1130 |     }
1131 | 
1132 |     #[test]
1133 |     fn add_two_tries_2() {
1134 |         let mut t1 = Trie::new();
1135 |         t1.insert("word1");
1136 |         t1.insert("word2");
1137 |         t1.insert("apple");
1138 |         t1.insert("banana");
1139 | 
1140 |         let mut t2 = Trie::new();
1141 |         t2.insert("word3");
1142 |         t2.insert("word4");
1143 |         t2.insert("potato");
1144 |         t2.insert("watermelon");
1145 | 
1146 |         t1 += t2;
1147 | 
1148 |         let mut correct = Trie::new();
1149 |         correct.insert("word1");
1150 |         correct.insert("word2");
1151 |         correct.insert("apple");
1152 |         correct.insert("banana");
1153 |         correct.insert("word3");
1154 |         correct.insert("word4");
1155 |         correct.insert("potato");
1156 |         correct.insert("watermelon");
1157 | 
1158 |         let mut t1_words = t1.get_all();
1159 |         let mut correct_words = correct.get_all();
1160 | 
1161 |         t1_words.sort();
1162 |         correct_words.sort();
1163 |         assert_eq!(t1_words, correct_words);
1164 |     }
1165 | 
1166 |     #[test]
1167 |     fn add_two_tries_3() {
1168 |         let mut t1 = Trie::new();
1169 |         t1.insert("word1");
1170 | 
1171 |         let mut t2 = Trie::new();
1172 |         t2.insert("word2");
1173 |         t2.insert("word");
1174 | 
1175 |         t1 += t2;
1176 | 
1177 |         let mut correct = Trie::new();
1178 |         correct.insert("word");
1179 |         correct.insert("word1");
1180 |         correct.insert("word2");
1181 | 
1182 |         let mut t1_words = t1.get_all();
1183 |         let mut correct_words = correct.get_all();
1184 | 
1185 |         t1_words.sort();
1186 |         correct_words.sort();
1187 |         assert_eq!(t1_words, correct_words);
1188 |     }
1189 | }
1190 | 


--------------------------------------------------------------------------------