├── .gitignore ├── src ├── trie_node.rs ├── trie.rs ├── trie_node │ ├── regular_node.rs │ └── data_node.rs ├── trie │ ├── regular_trie.rs │ └── data_trie.rs └── lib.rs ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── LICENSE ├── README.md └── tests ├── complete_test_regular.rs └── complete_test_data.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | .idea/ 4 | *.txt -------------------------------------------------------------------------------- /src/trie_node.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "data")] 2 | mod data_node; 3 | 4 | mod regular_node; 5 | 6 | #[cfg(feature = "data")] 7 | pub(crate) use data_node::TrieDataNode; 8 | 9 | pub(crate) use regular_node::TrieDatalessNode; 10 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Test CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | test: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Build 20 | run: cargo build --release --verbose 21 | - name: Run tests 22 | run: cargo test --all --verbose 23 | -------------------------------------------------------------------------------- /src/trie.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "unicode")] 2 | use unicode_segmentation::UnicodeSegmentation; 3 | 4 | #[cfg(feature = "data")] 5 | mod data_trie; 6 | 7 | #[cfg(feature = "data")] 8 | pub use data_trie::DataTrie; 9 | 10 | mod regular_trie; 11 | 12 | pub use regular_trie::Trie; 13 | 14 | /// Function returns true characters if the 'unicode' feature is enabled, 15 | /// else it splits on "" and removes the first and last element, which may 16 | /// result in wrong data if used with unicode text. 17 | fn get_characters(word: &str) -> Vec<&str> { 18 | #[cfg(feature = "unicode")] 19 | return UnicodeSegmentation::graphemes(word, true).collect(); 20 | 21 | #[cfg(not(feature = "unicode"))] 22 | { 23 | word.split("") 24 | .collect::>() 25 | .iter() 26 | .skip(1) 27 | .rev() 28 | .skip(1) 29 | .rev() 30 | .cloned() 31 | .collect() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "basic_trie" 3 | version = "2.0.0" 4 | edition = "2021" 5 | authors = ["LukasCobbler"] 6 | license-file = "LICENSE" 7 | readme = "README.md" 8 | keywords = ["trie", "collection", "generic"] 9 | description = "A simple Trie implementation in Rust" 10 | repository = "https://github.com/lukascobbler/basic_trie" 11 | documentation = "https://docs.rs/basic_trie/" 12 | categories = ["data-structures"] 13 | 14 | [dev-dependencies] 15 | serde_json = "1.0.*" 16 | serde-pickle = "1.1.*" 17 | peak_alloc = "0.2.0" 18 | randomizer = "0.1.2" 19 | growable-bloom-filter = "2.1.0" 20 | 21 | [dependencies] 22 | unicode-segmentation = { version = "1.11.0", optional = true } 23 | serde_crate = { package = "serde", optional = true, version = "1.0.*", features = ["derive"] } 24 | fxhash = "0.2.1" 25 | thin-vec = "0.2.12" 26 | arrayvec = "0.7.4" 27 | fixedstr = "0.5.5" 28 | 29 | [features] 30 | default = ["unicode", "data"] 31 | data = [] 32 | unicode = ["unicode-segmentation"] 33 | serde = ["serde_crate", "thin-vec/serde", "arrayvec/serde"] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LukasCobbler 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Basic Trie 2 | 3 | [![Test CI](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml/badge.svg)](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml) 4 | 5 | The trie data structure is used for quick access to words and 6 | data that should (could) be associated with them. 7 | 8 | **Basic Trie** is implemented as a tree where each node holds a single character 9 | that could point at any other character thus allowing insertion of arbitrary words. 10 | 11 | ##### There are two major implementations: 12 | - Trie where words are inserted with nothing attached to them 13 | - Data Trie where each word has a corresponding vector of data attached to it 14 | 15 | Regular tries are often used for word lookups and prefix matching, and data tries are 16 | often used for finding all data that is connected to some prefix. 17 | 18 | For example, when inserting a whole book in the trie, you could insert every word with 19 | the corresponding page number it's on. Later when searching for the word, you could get all 20 | the pages the word is on with no added performance cost. 21 | 22 | ### Global features 23 | - insertion / removal of words 24 | - fast contains check 25 | - finding words based on a prefix 26 | - longest / shortest words in the trie 27 | - generic methods: `is_empty`, `len`, `clear` 28 | - Trie equality with `==` 29 | - Trie merging with `+` or `+=` 30 | 31 | ### Data Trie features 32 | - generic type implementation for associating a word to any type, with zero trait constraints 33 | - finding data of words based on exact match or prefix 34 | 35 | ### Optional features 36 | - unicode support via the 'unicode' feature with the `unicode-segmentation` crate (enabled by default) 37 | - data trie support via the 'data' feature (enabled by default) 38 | - serialization and deserialization via the 'serde' feature with the `serde` crate 39 | 40 | ### Dependencies 41 | - `unicode-segmentation` (enabled by default) 42 | - `serde` (only with 'serde' feature flag) 43 | - `fxhash` 44 | - `thin-vec` 45 | - `arrayvec` 46 | 47 | ### License 48 | The software is licensed under the MIT license. 49 | 50 | ### Examples 51 | 52 | ```rust 53 | use basic_trie::Trie; 54 | 55 | let mut trie = Trie::new(); 56 | trie.insert("eat"); 57 | trie.insert("eating"); 58 | trie.insert("wizard"); 59 | 60 | let mut found_longest_words = trie.get_longest(); 61 | found_longest_words.sort(); 62 | 63 | assert!(trie.contains("wizard")); 64 | assert_eq!(vec![String::from("eating"), String::from("wizard")], found_longest_words); 65 | assert_eq!(vec![String::from("eat")], trie.get_shortest()); 66 | assert_eq!(3, trie.len()); 67 | ``` 68 | 69 | ```rust 70 | use basic_trie::DataTrie; 71 | 72 | let mut data_trie = DataTrie::::new(); 73 | data_trie.insert("apple", 1); 74 | data_trie.insert("apple", 2); 75 | data_trie.insert_no_data("banana"); 76 | data_trie.insert("avocado", 15); 77 | 78 | let mut found_data = data_trie.get_data("apple", false).unwrap(); 79 | found_data.sort(); 80 | assert_eq!(vec![&1, &2], found_data); 81 | 82 | let mut found_data = data_trie.get_data("a", true).unwrap(); 83 | found_data.sort(); 84 | assert_eq!(vec![&1, &2, &15], found_data); 85 | 86 | assert_eq!(vec![15], data_trie.remove("avocado").unwrap()); 87 | ``` 88 | 89 | ## Changelog 90 | - **2.0.0** - Major redesign: increased memory efficiency for the regular Trie (used to be Dataless Trie); 91 | Changed API names to better match the standard library; splitting the two implementations code-wise thus 92 | fixing the documentation not rendering bug. 93 | - **1.2.3** – Adding dependencies for even more memory layout optimisations. 94 | - **1.2.2** – More memory optimisations with Box. 95 | - **1.2.1** – Memory performance upgrade with Box. Mutable data retrieval. 96 | - **1.2.0** – Equality and addition operators support between 97 | same Trie types via `==`, `+` and `+=`. 98 | - **1.1.1** – Adding `FxHashMap` dependency for boosted performance. 99 | - **1.1.0** – Serialization with the `serde` crate and the 'serde' feature. 100 | - **1.0.3** – Optimisation of `number_of_words()`. Removing lifetime requirements 101 | for word insertion for much better flexibility at the same logical memory cost. 102 | - **1.0.2** – Bug fixes. 103 | - **1.0.1** – `insert_no_data()` for `DataTrie`. Bugfixes. 104 | - **1.0.0** – Separation of `DataTrie` and `DatalessTrie`. Optimizing 105 | performance for `DatalessTrie`. Incompatible with older versions. 106 | - **<1.0.0** – Simple `Trie` with data and base features. 107 | 108 | -------------------------------------------------------------------------------- /tests/complete_test_regular.rs: -------------------------------------------------------------------------------- 1 | use basic_trie::Trie; 2 | use growable_bloom_filter::GrowableBloom; 3 | use peak_alloc::PeakAlloc; 4 | use randomizer::Randomizer; 5 | use std::collections::HashMap; 6 | use std::collections::HashSet; 7 | use std::time::Instant; 8 | use unicode_segmentation::UnicodeSegmentation; 9 | 10 | #[global_allocator] 11 | static PEAK_ALLOC: PeakAlloc = PeakAlloc; 12 | 13 | pub struct BigData { 14 | pub number_of_words: usize, 15 | pub first_letter_histogram: HashMap, 16 | pub big_data: Vec, 17 | } 18 | 19 | pub fn generate_random_lines(x: usize, y: usize) -> BigData { 20 | let mut bloom_filter = GrowableBloom::new(0.01, x); 21 | let mut result = Vec::new(); 22 | let mut first_letter_histogram = HashMap::::new(); 23 | let mut number_of_words = 0; 24 | 25 | for _ in 0..x { 26 | let random_string = Randomizer::ALPHABETICAL_LOWER(y).string().unwrap(); 27 | if !bloom_filter.contains(&random_string) { 28 | bloom_filter.insert(&random_string); 29 | 30 | let first_letter = random_string[0..1].to_string(); 31 | *first_letter_histogram.entry(first_letter).or_insert(0) += 1; 32 | 33 | result.push(random_string); 34 | number_of_words += 1; 35 | } 36 | } 37 | 38 | BigData { 39 | number_of_words, 40 | first_letter_histogram, 41 | big_data: result, 42 | } 43 | } 44 | 45 | #[test] 46 | fn overall_regular() { 47 | let number_of_words = 500_000; 48 | let word_length = 15; 49 | 50 | let big_data = generate_random_lines(number_of_words, word_length); 51 | 52 | println!( 53 | "Memory usage after loading words: {:.1}mb\n", 54 | PEAK_ALLOC.current_usage_as_mb() 55 | ); 56 | 57 | let mut trie = Trie::new(); 58 | 59 | for word in big_data.big_data { 60 | trie.insert(&word); 61 | } 62 | 63 | println!( 64 | "Memory usage after trie generation: {:.1}mb", 65 | PEAK_ALLOC.current_usage_as_mb() 66 | ); 67 | 68 | let now = Instant::now(); 69 | 70 | assert_eq!(trie.len(), big_data.number_of_words); 71 | assert_eq!(word_length, trie.get_longest()[0].graphemes(true).count()); 72 | assert_eq!(word_length, trie.get_shortest()[0].graphemes(true).count()); 73 | 74 | for (first_letter, count) in big_data.first_letter_histogram.iter() { 75 | assert_eq!(*count, trie.len_prefix(first_letter)); 76 | } 77 | 78 | let elapsed = now.elapsed(); 79 | println!("Operations time: {:.2?}", elapsed); 80 | } 81 | 82 | #[test] 83 | fn clearing_regular() { 84 | let number_of_words = 500_000; 85 | let word_length = 15; 86 | 87 | let big_data = generate_random_lines(number_of_words, word_length); 88 | 89 | println!( 90 | "Memory usage after loading words: {:.1}mb\n", 91 | PEAK_ALLOC.current_usage_as_mb() 92 | ); 93 | 94 | let mut trie = Trie::new(); 95 | 96 | for word in big_data.big_data { 97 | trie.insert(&word); 98 | } 99 | 100 | println!( 101 | "Memory usage after trie generation: {:.1}mb\n", 102 | PEAK_ALLOC.current_usage_as_mb() 103 | ); 104 | 105 | let now = Instant::now(); 106 | 107 | trie.clear(); 108 | assert!(trie.is_empty()); 109 | 110 | let elapsed = now.elapsed(); 111 | println!("Operations time: {:.2?}\n", elapsed); 112 | 113 | println!( 114 | "Memory usage after trie cleanup: {:.1}mb\n", 115 | PEAK_ALLOC.current_usage_as_mb() 116 | ); 117 | } 118 | 119 | #[test] 120 | fn add_op_regular_1() { 121 | let number_of_words = 100_000; 122 | let word_length = 15; 123 | 124 | let big_data = generate_random_lines(number_of_words, word_length); 125 | 126 | let mut trie_0 = Trie::new(); 127 | let mut trie_1 = Trie::new(); 128 | let mut trie_2 = Trie::new(); 129 | 130 | println!( 131 | "Memory usage after loading words: {:.1}mb\n", 132 | PEAK_ALLOC.current_usage_as_mb() 133 | ); 134 | 135 | for line in big_data.big_data.iter() { 136 | trie_0.insert(line); 137 | } 138 | 139 | for line in big_data 140 | .big_data 141 | .iter() 142 | .rev() 143 | .skip(big_data.number_of_words / 2) 144 | { 145 | trie_1.insert(line); 146 | } 147 | 148 | for line in big_data.big_data.iter().skip(big_data.number_of_words / 2) { 149 | trie_2.insert(line); 150 | } 151 | 152 | println!( 153 | "Memory usage after trie generation: {:.1}mb", 154 | PEAK_ALLOC.current_usage_as_mb() 155 | ); 156 | 157 | let now = Instant::now(); 158 | 159 | trie_1 += trie_2; 160 | 161 | let elapsed = now.elapsed(); 162 | println!("Operations time: {:.2?}", elapsed); 163 | 164 | let t1_words = trie_1.get_all(); 165 | let correct_words = trie_0.get_all(); 166 | 167 | let item_set: HashSet<_> = t1_words.iter().collect(); 168 | let only_in_correct: Vec<_> = correct_words 169 | .into_iter() 170 | .filter(|item| !item_set.contains(item)) 171 | .collect(); 172 | 173 | assert_eq!(only_in_correct, Vec::::new()); 174 | assert_eq!(only_in_correct.len(), 0); 175 | assert!(trie_0 == trie_1); 176 | 177 | println!( 178 | "Memory usage after trie addition: {:.1}mb\n", 179 | PEAK_ALLOC.current_usage_as_mb() 180 | ); 181 | } 182 | 183 | #[test] 184 | fn add_op_regular_2() { 185 | let number_of_words = 100_000; 186 | let word_length = 15; 187 | 188 | let big_data = generate_random_lines(number_of_words, word_length); 189 | 190 | let mut trie_0 = Trie::new(); 191 | let mut trie_1 = Trie::new(); 192 | let mut trie_2 = Trie::new(); 193 | 194 | println!( 195 | "Memory usage after loading words: {:.1}mb\n", 196 | PEAK_ALLOC.current_usage_as_mb() 197 | ); 198 | 199 | for line in big_data.big_data.iter() { 200 | trie_0.insert(&line); 201 | } 202 | 203 | for line in big_data.big_data.iter().rev().skip(20000) { 204 | trie_1.insert(&line); 205 | } 206 | 207 | for line in big_data.big_data.iter().skip(20000) { 208 | trie_2.insert(&line); 209 | } 210 | 211 | println!( 212 | "Memory usage after trie generation: {:.1}mb", 213 | PEAK_ALLOC.current_usage_as_mb() 214 | ); 215 | 216 | let now = Instant::now(); 217 | 218 | let trie_3 = trie_1 + trie_2; 219 | let elapsed = now.elapsed(); 220 | println!("Operations time: {:.2?}", elapsed); 221 | 222 | assert!(trie_0 == trie_3); 223 | 224 | let t3_words = trie_3.get_all(); 225 | let correct_words = trie_0.get_all(); 226 | 227 | let item_set: HashSet<_> = t3_words.iter().collect(); 228 | let only_in_correct: Vec<_> = correct_words 229 | .into_iter() 230 | .filter(|item| !item_set.contains(item)) 231 | .collect(); 232 | 233 | assert_eq!(only_in_correct.len(), 0); 234 | 235 | println!( 236 | "Memory usage after trie addition: {:.1}mb\n", 237 | PEAK_ALLOC.current_usage_as_mb() 238 | ); 239 | } 240 | 241 | #[test] 242 | fn equals_regular() { 243 | let number_of_words = 100_000; 244 | let word_length = 15; 245 | 246 | let big_data = generate_random_lines(number_of_words, word_length); 247 | 248 | let mut trie_1 = Trie::new(); 249 | let mut trie_2 = Trie::new(); 250 | 251 | println!( 252 | "Memory usage after loading words: {:.1}mb\n", 253 | PEAK_ALLOC.current_usage_as_mb() 254 | ); 255 | 256 | for line in big_data.big_data.iter() { 257 | trie_1.insert(&line); 258 | } 259 | 260 | for line in big_data.big_data.iter() { 261 | trie_2.insert(&line); 262 | } 263 | 264 | println!( 265 | "Memory usage after trie generation: {:.1}mb", 266 | PEAK_ALLOC.current_usage_as_mb() 267 | ); 268 | 269 | let now = Instant::now(); 270 | 271 | let result = trie_1 == trie_2; 272 | 273 | let elapsed = now.elapsed(); 274 | println!("Operations time: {:.2?}", elapsed); 275 | 276 | assert!(result); 277 | 278 | println!( 279 | "Memory usage after trie addition: {:.1}mb\n", 280 | PEAK_ALLOC.current_usage_as_mb() 281 | ); 282 | } 283 | -------------------------------------------------------------------------------- /src/trie_node/regular_node.rs: -------------------------------------------------------------------------------- 1 | use fxhash::FxHashMap; 2 | use std::cmp::Ordering; 3 | use std::ops; 4 | 5 | #[cfg(feature = "serde")] 6 | use serde_crate::{Deserialize, Serialize}; 7 | 8 | /// Singular trie node that represents its children and a marker for word ending. 9 | #[derive(Debug, Default, Clone)] 10 | #[cfg_attr( 11 | feature = "serde", 12 | derive(Serialize, Deserialize), 13 | serde(crate = "serde_crate") 14 | )] 15 | pub struct TrieDatalessNode { 16 | #[cfg_attr(feature = "serde", serde(rename = "c"))] 17 | pub(crate) children: Box, TrieDatalessNode>>, 18 | #[cfg_attr(feature = "serde", serde(rename = "we"))] 19 | word_end: bool, 20 | } 21 | 22 | impl TrieDatalessNode { 23 | /// Returns a new instance of a TrieNode. 24 | pub(crate) fn new() -> Self { 25 | TrieDatalessNode { 26 | children: Default::default(), 27 | word_end: false, 28 | } 29 | } 30 | 31 | /// Recursive function for inserting found words from the given node and 32 | /// given starting substring. 33 | pub(crate) fn find_words(&self, substring: &str, found_words: &mut Vec) { 34 | if self.is_associated() { 35 | found_words.push(substring.to_string()); 36 | } 37 | 38 | self.children.iter().for_each(|(character, node)| { 39 | node.find_words(&(substring.to_owned() + character), found_words) 40 | }); 41 | } 42 | 43 | /// The recursive function for finding a vector of shortest and longest words in the TrieNode consists of: 44 | /// - the DFS tree traversal part for getting to every child node; 45 | /// - matching lengths of found words in combination with the passed ordering. 46 | pub(crate) fn words_min_max( 47 | &self, 48 | substring: &str, 49 | found_words: &mut Vec, 50 | ord: Ordering, 51 | ) { 52 | 'word: { 53 | if self.is_associated() { 54 | if let Some(found) = found_words.first() { 55 | match substring.len().cmp(&found.len()) { 56 | Ordering::Less if ord == Ordering::Less => { 57 | found_words.clear(); 58 | } 59 | Ordering::Greater if ord == Ordering::Greater => { 60 | found_words.clear(); 61 | } 62 | Ordering::Equal => (), 63 | _ => break 'word, 64 | } 65 | } 66 | found_words.push(substring.to_string()); 67 | } 68 | } 69 | 70 | self.children.iter().for_each(|(character, node)| { 71 | node.words_min_max(&(substring.to_owned() + character), found_words, ord) 72 | }); 73 | } 74 | 75 | /// Recursive function that drops all children maps 76 | /// regardless of having multiple words branching from them or not. 77 | /// Counts the number of words removed. 78 | pub(crate) fn remove_all_words(&mut self) -> usize { 79 | let num_removed = self 80 | .children 81 | .values_mut() 82 | .map(|child| child.remove_all_words()) 83 | .sum::() 84 | + self.is_associated() as usize; 85 | 86 | self.clear_children(); 87 | 88 | num_removed 89 | } 90 | 91 | /// Recursive function that counts the number of words from a starting node. 92 | pub(crate) fn count_words(&self) -> usize { 93 | self.children 94 | .values() 95 | .map(|child| child.count_words()) 96 | .sum::() 97 | + self.is_associated() as usize 98 | } 99 | 100 | /// Recursive function for removing and freeing memory of a word that is not needed anymore. 101 | /// The algorithm first finds the last node of a word given in the form of a character iterator, 102 | /// then it frees the maps and unwinds to the first node that should not be deleted. 103 | /// The first node that should not be deleted is either: 104 | /// - the root node 105 | /// - the node that has multiple words branching from it 106 | /// - the node that represents an end to some word with the same prefix 107 | /// The last node's data is propagated all the way to the final return 108 | /// with the help of auxiliary 'RemoveData' struct. 109 | pub(crate) fn remove_one_word<'b>( 110 | &mut self, 111 | mut characters: impl Iterator, 112 | ) -> bool { 113 | let next_character = match characters.next() { 114 | None => { 115 | self.disassociate(); 116 | return false; 117 | } 118 | Some(char) => char, 119 | }; 120 | 121 | let next_node = self.children.get_mut(next_character).unwrap(); 122 | let must_keep = next_node.remove_one_word(characters); 123 | 124 | if self.children.len() > 1 || must_keep { 125 | return true; 126 | } 127 | self.clear_children(); 128 | 129 | self.is_associated() 130 | } 131 | 132 | /// Function marks the node as an end of a word. 133 | pub(crate) fn associate(&mut self) { 134 | self.word_end = true; 135 | } 136 | 137 | /// Function unmarks the node as an end of a word. 138 | pub(crate) fn disassociate(&mut self) { 139 | self.word_end = false; 140 | } 141 | 142 | pub(crate) fn is_associated(&self) -> bool { 143 | self.word_end 144 | } 145 | 146 | /// Function removes all children of a node. 147 | pub(crate) fn clear_children(&mut self) { 148 | self.children = Default::default(); 149 | } 150 | } 151 | 152 | impl ops::AddAssign for TrieDatalessNode { 153 | /// Overriding the += operator on nodes. 154 | /// Function adds two nodes based on the principle: 155 | /// for every child node and character in the 'rhs' node: 156 | /// - if the self node doesn't have that character in it's children map, 157 | /// simply move the pointer to the self's children map without any extra cost; 158 | /// - if the self node has that character, the node of that character (self's child) 159 | /// is added with the 'rhc's' node. 160 | /// An edge case exists when the 'rhc's' node has an association but self's node doesn't. 161 | /// That association is handled based on the result of 'rhc_next_node.word_end'. 162 | /// On true, the self node vector is initialized with the 'rhc' node vector. 163 | fn add_assign(&mut self, rhs: Self) { 164 | for (char, rhs_next_node) in rhs.children.into_iter() { 165 | // Does self contain the character? 166 | match self.children.remove(&*char) { 167 | // The whole node is removed, as owned, operated on and returned in self's children. 168 | Some(mut self_next_node) => { 169 | // Edge case: associate self node if the other node is also associated 170 | // Example: when adding 'word' to 'word1', 'd' on 'word' needs to be associated 171 | if rhs_next_node.word_end { 172 | self_next_node.word_end = true; 173 | } 174 | 175 | self_next_node += rhs_next_node; 176 | self.children.insert(char, self_next_node); 177 | } 178 | // Self doesn't contain the character, no conflict arises. 179 | // The whole 'rhs' node is just moved from 'rhs' into self. 180 | None => { 181 | self.children.insert(char, rhs_next_node); 182 | } 183 | } 184 | } 185 | } 186 | } 187 | 188 | impl PartialEq for TrieDatalessNode { 189 | fn eq(&self, other: &Self) -> bool { 190 | // If keys aren't equal, nodes aren't equal. 191 | if !(self.children.len() == other.children.len() 192 | && self.children.keys().all(|k| other.children.contains_key(k))) 193 | { 194 | return false; 195 | } 196 | 197 | // If the node on one trie is a word end, and on the other it isn't, two nodes aren't equal. 198 | if self.word_end != other.word_end { 199 | return false; 200 | } 201 | 202 | // Every child node that has the same key (character) must be equal. 203 | self.children 204 | .iter() 205 | .map(|(char, self_child)| (self_child, other.children.get(char).unwrap())) 206 | .all(|(self_child, other_child)| other_child == self_child) 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /tests/complete_test_data.rs: -------------------------------------------------------------------------------- 1 | use basic_trie::DataTrie; 2 | use growable_bloom_filter::GrowableBloom; 3 | use peak_alloc::PeakAlloc; 4 | use randomizer::Randomizer; 5 | use std::collections::HashMap; 6 | use std::collections::HashSet; 7 | use std::time::Instant; 8 | use unicode_segmentation::UnicodeSegmentation; 9 | 10 | #[global_allocator] 11 | static PEAK_ALLOC: PeakAlloc = PeakAlloc; 12 | 13 | pub struct BigData { 14 | pub number_of_words: usize, 15 | pub first_letter_histogram: HashMap, 16 | pub big_data: Vec, 17 | } 18 | 19 | pub fn generate_random_lines(x: usize, y: usize) -> BigData { 20 | let mut bloom_filter = GrowableBloom::new(0.01, x); 21 | let mut result = Vec::new(); 22 | let mut first_letter_histogram = HashMap::::new(); 23 | let mut number_of_words = 0; 24 | 25 | while number_of_words != x { 26 | let random_string = Randomizer::ALPHABETICAL_LOWER(y).string().unwrap(); 27 | if !bloom_filter.contains(&random_string) { 28 | bloom_filter.insert(&random_string); 29 | 30 | let first_letter = random_string[0..1].to_string(); 31 | *first_letter_histogram.entry(first_letter).or_insert(0) += 1; 32 | 33 | result.push(random_string); 34 | number_of_words += 1; 35 | } 36 | } 37 | 38 | BigData { 39 | number_of_words, 40 | first_letter_histogram, 41 | big_data: result, 42 | } 43 | } 44 | 45 | #[test] 46 | fn overall_data() { 47 | let number_of_words = 500_000; 48 | let word_length = 15; 49 | 50 | let big_data = generate_random_lines(number_of_words, word_length); 51 | 52 | println!( 53 | "Memory usage after loading words: {:.1}mb\n", 54 | PEAK_ALLOC.current_usage_as_mb() 55 | ); 56 | 57 | let mut data_trie = DataTrie::new(); 58 | 59 | for word in big_data.big_data { 60 | data_trie.insert(&word, 1000); 61 | } 62 | 63 | println!( 64 | "Memory usage after data trie generation: {:.1}mb", 65 | PEAK_ALLOC.current_usage_as_mb() 66 | ); 67 | 68 | let now = Instant::now(); 69 | 70 | assert_eq!(data_trie.len(), big_data.number_of_words); 71 | assert_eq!( 72 | word_length, 73 | data_trie.get_longest()[0].graphemes(true).count() 74 | ); 75 | assert_eq!( 76 | word_length, 77 | data_trie.get_shortest()[0].graphemes(true).count() 78 | ); 79 | 80 | for (first_letter, count) in big_data.first_letter_histogram.iter() { 81 | assert_eq!(*count, data_trie.len_prefix(first_letter)); 82 | } 83 | 84 | for (first_letter, count) in big_data.first_letter_histogram.iter() { 85 | assert_eq!( 86 | vec![1000; *count], 87 | data_trie.remove_prefix(first_letter).unwrap() 88 | ); 89 | } 90 | 91 | assert!(data_trie.is_empty()); 92 | 93 | let elapsed = now.elapsed(); 94 | println!("Operations time: {:.2?}", elapsed); 95 | } 96 | 97 | #[test] 98 | fn clearing_data() { 99 | let number_of_words = 500_000; 100 | let word_length = 15; 101 | 102 | let big_data = generate_random_lines(number_of_words, word_length); 103 | 104 | println!( 105 | "Memory usage after loading words: {:.1}mb\n", 106 | PEAK_ALLOC.current_usage_as_mb() 107 | ); 108 | 109 | let mut data_trie = DataTrie::new(); 110 | 111 | for word in big_data.big_data { 112 | data_trie.insert(&word, 0); 113 | } 114 | 115 | println!( 116 | "Memory usage after data trie generation: {:.1}mb\n", 117 | PEAK_ALLOC.current_usage_as_mb() 118 | ); 119 | 120 | let now = Instant::now(); 121 | 122 | data_trie.clear(); 123 | assert!(data_trie.is_empty()); 124 | 125 | let elapsed = now.elapsed(); 126 | println!("Operations time: {:.2?}\n", elapsed); 127 | 128 | println!( 129 | "Memory usage after data trie cleanup: {:.1}mb\n", 130 | PEAK_ALLOC.current_usage_as_mb() 131 | ); 132 | } 133 | 134 | #[test] 135 | fn add_op_data_1() { 136 | let number_of_words = 100_000; 137 | let word_length = 15; 138 | 139 | let big_data = generate_random_lines(number_of_words, word_length); 140 | 141 | let mut data_trie_0 = DataTrie::new(); 142 | let mut data_trie_1 = DataTrie::new(); 143 | let mut data_trie_2 = DataTrie::new(); 144 | 145 | println!( 146 | "Memory usage after loading words: {:.1}mb\n", 147 | PEAK_ALLOC.current_usage_as_mb() 148 | ); 149 | 150 | for line in big_data.big_data.iter() { 151 | data_trie_0.insert(line, line.as_str()); 152 | } 153 | 154 | for line in big_data 155 | .big_data 156 | .iter() 157 | .rev() 158 | .skip(big_data.number_of_words / 2) 159 | { 160 | data_trie_1.insert(line, line.as_str()); 161 | } 162 | 163 | for line in big_data.big_data.iter().skip(big_data.number_of_words / 2) { 164 | data_trie_2.insert(line, line.as_str()); 165 | } 166 | 167 | println!( 168 | "Memory usage after data trie generation: {:.1}mb", 169 | PEAK_ALLOC.current_usage_as_mb() 170 | ); 171 | 172 | let now = Instant::now(); 173 | 174 | data_trie_1 += data_trie_2; 175 | 176 | let elapsed = now.elapsed(); 177 | println!("Operations time: {:.2?}", elapsed); 178 | 179 | let t1_words = data_trie_1.get_all(); 180 | let correct_words = data_trie_0.get_all(); 181 | 182 | let item_set: HashSet<_> = t1_words.iter().collect(); 183 | let only_in_correct: Vec<_> = correct_words 184 | .into_iter() 185 | .filter(|item| !item_set.contains(item)) 186 | .collect(); 187 | 188 | assert_eq!(only_in_correct.len(), 0); 189 | println!("{}", big_data.number_of_words); 190 | assert!(data_trie_0 == data_trie_1); 191 | 192 | println!( 193 | "Memory usage after data trie addition: {:.1}mb\n", 194 | PEAK_ALLOC.current_usage_as_mb() 195 | ); 196 | } 197 | 198 | #[test] 199 | fn add_op_data_2() { 200 | let number_of_words = 100_000; 201 | let word_length = 15; 202 | 203 | let big_data = generate_random_lines(number_of_words, word_length); 204 | 205 | let mut data_trie_0 = DataTrie::new(); 206 | let mut data_trie_1 = DataTrie::new(); 207 | let mut data_trie_2 = DataTrie::new(); 208 | 209 | println!( 210 | "Memory usage after loading words: {:.1}mb\n", 211 | PEAK_ALLOC.current_usage_as_mb() 212 | ); 213 | 214 | for line in big_data.big_data.iter() { 215 | data_trie_0.insert(&line, line.as_str()); 216 | } 217 | 218 | for line in big_data.big_data.iter().rev().skip(number_of_words / 2) { 219 | data_trie_1.insert(&line, line.as_str()); 220 | } 221 | 222 | for line in big_data.big_data.iter().skip(number_of_words / 2) { 223 | data_trie_2.insert(&line, line.as_str()); 224 | } 225 | 226 | println!( 227 | "Memory usage after data trie generation: {:.1}mb", 228 | PEAK_ALLOC.current_usage_as_mb() 229 | ); 230 | 231 | let now = Instant::now(); 232 | 233 | let data_trie_3 = data_trie_1 + data_trie_2; 234 | let elapsed = now.elapsed(); 235 | println!("Operations time: {:.2?}", elapsed); 236 | 237 | let t3_words = data_trie_3.get_all(); 238 | let correct_words = data_trie_0.get_all(); 239 | 240 | let item_set: HashSet<_> = t3_words.iter().collect(); 241 | let only_in_correct: Vec<_> = correct_words 242 | .into_iter() 243 | .filter(|item| !item_set.contains(item)) 244 | .collect(); 245 | 246 | assert_eq!(only_in_correct.len(), 0); 247 | assert!(data_trie_0 == data_trie_3); 248 | 249 | println!( 250 | "Memory usage after data trie addition: {:.1}mb\n", 251 | PEAK_ALLOC.current_usage_as_mb() 252 | ); 253 | } 254 | 255 | #[test] 256 | fn equals_data() { 257 | let number_of_words = 100_000; 258 | let word_length = 15; 259 | 260 | let big_data = generate_random_lines(number_of_words, word_length); 261 | 262 | let mut data_trie_1 = DataTrie::new(); 263 | let mut data_trie_2 = DataTrie::new(); 264 | 265 | println!( 266 | "Memory usage after loading words: {:.1}mb\n", 267 | PEAK_ALLOC.current_usage_as_mb() 268 | ); 269 | 270 | for line in big_data.big_data.iter() { 271 | data_trie_1.insert(&line, line.as_str()); 272 | } 273 | 274 | for line in big_data.big_data.iter() { 275 | data_trie_2.insert(&line, line.as_str()); 276 | } 277 | 278 | println!( 279 | "Memory usage after data trie generation: {:.1}mb", 280 | PEAK_ALLOC.current_usage_as_mb() 281 | ); 282 | 283 | let now = Instant::now(); 284 | 285 | let result = data_trie_1 == data_trie_2; 286 | 287 | let elapsed = now.elapsed(); 288 | println!("Operations time: {:.2?}", elapsed); 289 | 290 | assert!(result); 291 | 292 | println!( 293 | "Memory usage after data trie addition: {:.1}mb\n", 294 | PEAK_ALLOC.current_usage_as_mb() 295 | ); 296 | } 297 | -------------------------------------------------------------------------------- /src/trie_node/data_node.rs: -------------------------------------------------------------------------------- 1 | use fxhash::FxHashMap; 2 | use std::cmp::Ordering; 3 | use std::ops; 4 | use thin_vec::ThinVec; 5 | 6 | #[cfg(feature = "serde")] 7 | use serde_crate::{Deserialize, Serialize}; 8 | 9 | type WordEnd = Option>; 10 | 11 | /// Helper struct for returning multiple values for deleting data. 12 | /// It is needed because the 'must_keep' value will at some point change 13 | /// from false to true, but the data stays the same from the beginning of 14 | /// unwinding. 15 | pub(crate) struct RemoveData { 16 | must_keep: bool, 17 | pub(crate) data: WordEnd, 18 | } 19 | 20 | /// Singular trie node that represents its children and a marker for word ending. 21 | #[derive(Debug, Default, Clone)] 22 | #[cfg_attr( 23 | feature = "serde", 24 | derive(Serialize, Deserialize), 25 | serde(crate = "serde_crate") 26 | )] 27 | pub struct TrieDataNode { 28 | #[cfg_attr(feature = "serde", serde(rename = "c"))] 29 | pub(crate) children: Box, TrieDataNode>>, 30 | #[cfg_attr(feature = "serde", serde(rename = "wed"))] 31 | word_end_data: WordEnd, 32 | } 33 | 34 | /// Methods only on nodes that have data. 35 | impl TrieDataNode { 36 | /// Returns a new instance of a TrieNode. 37 | pub(crate) fn new() -> Self { 38 | TrieDataNode { 39 | children: Default::default(), 40 | word_end_data: None, 41 | } 42 | } 43 | 44 | /// Recursive function that drops all children maps and collects data 45 | /// regardless of having multiple words branching from them or not. 46 | pub(crate) fn remove_all_words_collect(&mut self, found_data: &mut Vec) -> usize { 47 | let num_removed = self 48 | .children 49 | .values_mut() 50 | .map(|child| child.remove_all_words_collect(found_data)) 51 | .sum::() 52 | + self.is_associated() as usize; 53 | 54 | if let Some(data_vec) = self.disassociate() { 55 | found_data.extend(data_vec); 56 | } 57 | 58 | self.clear_children(); 59 | 60 | num_removed 61 | } 62 | 63 | /// Recursive function that counts the number of words from a starting node. 64 | pub(crate) fn count_words(&self) -> usize { 65 | self.children 66 | .values() 67 | .map(|child| child.count_words()) 68 | .sum::() 69 | + self.is_associated() as usize 70 | } 71 | 72 | /// Recursive function finds every node that is an end of a word and appends 73 | /// its data as references to the passed vector. 74 | pub(crate) fn generate_all_data<'a>(&'a self, found_data: &mut Vec<&'a D>) { 75 | if let Some(data_vec) = &self.word_end_data { 76 | found_data.extend(data_vec.iter()); 77 | } 78 | 79 | self.children 80 | .values() 81 | .for_each(|x| x.generate_all_data(found_data)); 82 | } 83 | 84 | /// Recursive function finds every node that is an end of a word and appends 85 | /// its data as mutable references to the passed vector. 86 | pub(crate) fn generate_all_data_mut<'a>(&'a mut self, found_data: &mut Vec<&'a mut D>) { 87 | if let Some(data_vec) = &mut self.word_end_data { 88 | found_data.extend(data_vec.iter_mut()); 89 | } 90 | 91 | self.children 92 | .values_mut() 93 | .for_each(|x| x.generate_all_data_mut(found_data)); 94 | } 95 | 96 | /// Function pushes data to the association vector. 97 | pub(crate) fn push_data(&mut self, data: D) { 98 | self.get_association_mut().as_mut().unwrap().push(data); 99 | } 100 | 101 | /// Recursive function for inserting found words from the given node and 102 | /// given starting substring. 103 | pub(crate) fn find_words(&self, substring: &str, found_words: &mut Vec) { 104 | if self.is_associated() { 105 | found_words.push(substring.to_string()); 106 | } 107 | 108 | self.children.iter().for_each(|(character, node)| { 109 | node.find_words(&(substring.to_owned() + character), found_words) 110 | }); 111 | } 112 | 113 | /// The recursive function for finding a vector of shortest and longest words in the TrieNode consists of: 114 | /// - the DFS tree traversal part for getting to every child node; 115 | /// - matching lengths of found words in combination with the passed ordering. 116 | pub(crate) fn words_min_max( 117 | &self, 118 | substring: &str, 119 | found_words: &mut Vec, 120 | ord: Ordering, 121 | ) { 122 | 'word: { 123 | if self.is_associated() { 124 | if let Some(found) = found_words.first() { 125 | match substring.len().cmp(&found.len()) { 126 | Ordering::Less if ord == Ordering::Less => { 127 | found_words.clear(); 128 | } 129 | Ordering::Greater if ord == Ordering::Greater => { 130 | found_words.clear(); 131 | } 132 | Ordering::Equal => (), 133 | _ => break 'word, 134 | } 135 | } 136 | found_words.push(substring.to_string()); 137 | } 138 | } 139 | 140 | self.children.iter().for_each(|(character, node)| { 141 | node.words_min_max(&(substring.to_owned() + character), found_words, ord) 142 | }); 143 | } 144 | 145 | /// Function resets the association of a word and returns the 146 | /// previous association. If 'keep_word' is true, the association is only 147 | /// reset. 148 | pub(crate) fn clear_word_end_association(&mut self, keep_word: bool) -> WordEnd { 149 | let return_data = self.disassociate(); 150 | 151 | if keep_word && return_data.is_some() { 152 | self.associate(); 153 | } 154 | 155 | return_data 156 | } 157 | 158 | /// Recursive function for removing and freeing memory of a word that is not needed anymore. 159 | /// The algorithm first finds the last node of a word given in the form of a character iterator, 160 | /// then it frees the maps and unwinds to the first node that should not be deleted. 161 | /// The first node that should not be deleted is either: 162 | /// - the root node 163 | /// - the node that has multiple words branching from it 164 | /// - the node that represents an end to some word with the same prefix 165 | /// The last node's data is propagated all the way to the final return 166 | /// with the help of auxiliary 'RemoveData' struct. 167 | pub(crate) fn remove_one_word<'b>( 168 | &mut self, 169 | mut characters: impl Iterator, 170 | ) -> RemoveData { 171 | let next_character = match characters.next() { 172 | None => { 173 | return RemoveData { 174 | must_keep: false, 175 | data: self.disassociate(), 176 | } 177 | } 178 | Some(char) => char, 179 | }; 180 | 181 | let next_node = self.children.get_mut(next_character).unwrap(); 182 | let must_keep = next_node.remove_one_word(characters); 183 | 184 | if self.children.len() > 1 || must_keep.must_keep { 185 | return RemoveData { 186 | must_keep: true, 187 | data: must_keep.data, 188 | }; 189 | } 190 | self.clear_children(); 191 | 192 | RemoveData { 193 | must_keep: self.is_associated(), 194 | data: must_keep.data, 195 | } 196 | } 197 | 198 | /// Function marks the node as an end of a word. 199 | pub(crate) fn associate(&mut self) { 200 | self.word_end_data = Some(ThinVec::new()); 201 | } 202 | 203 | /// Function unmarks the node as an end of a word and returns the data. 204 | pub(crate) fn disassociate(&mut self) -> WordEnd { 205 | self.word_end_data.take() 206 | } 207 | 208 | /// Function returns true if an association is found for the word. 209 | pub(crate) fn is_associated(&self) -> bool { 210 | self.word_end_data.is_some() 211 | } 212 | 213 | /// Function returns the node association. 214 | pub(crate) fn get_association(&self) -> &WordEnd { 215 | &self.word_end_data 216 | } 217 | 218 | /// Function returns the mutable node association. 219 | pub(crate) fn get_association_mut(&mut self) -> &mut WordEnd { 220 | &mut self.word_end_data 221 | } 222 | 223 | /// Function removes all children of a node. 224 | pub(crate) fn clear_children(&mut self) { 225 | self.children = Default::default(); 226 | } 227 | } 228 | 229 | impl ops::AddAssign for TrieDataNode { 230 | /// Overriding the += operator on nodes. 231 | /// Function adds two nodes based on the principle: 232 | /// for every child node and character in the 'rhs' node: 233 | /// - if the self node doesn't have that character in its children map, 234 | /// simply move the pointer to the self's children map without any extra cost; 235 | /// - if the self node has that character, the node of that character (self's child) 236 | /// is added with the 'rhc's' node. 237 | /// An edge case exists when the 'rhc's' node has an association but self's node doesn't. 238 | /// That association is handled based on the result of 'rhc_next_node.word_end_data'. 239 | /// On Some(data), the self node vector is initialized with the 'rhc' node vector. 240 | fn add_assign(&mut self, rhs: Self) { 241 | for (char, mut rhs_next_node) in rhs.children.into_iter() { 242 | // Does self contain the character? 243 | match self.children.remove(&*char) { 244 | // The whole node is removed, as owned, operated on and returned in self's children. 245 | Some(mut self_next_node) => { 246 | // Edge case: associate self node if the other node is also associated 247 | // Example: when adding 'word' to 'word1', 'd' on 'word' needs to be associated 248 | if let Some(data_vec_rhs) = rhs_next_node.word_end_data.take() { 249 | if let Some(data_vec_self) = &mut self_next_node.word_end_data { 250 | data_vec_self.extend(data_vec_rhs); 251 | } else { 252 | self_next_node.word_end_data = Some(data_vec_rhs); 253 | } 254 | } 255 | 256 | self_next_node += rhs_next_node; 257 | self.children.insert(char, self_next_node); 258 | } 259 | // Self doesn't contain the character, no conflict arises. 260 | // The whole 'rhs' node is just moved from 'rhs' into self. 261 | None => { 262 | self.children.insert(char, rhs_next_node); 263 | } 264 | } 265 | } 266 | } 267 | } 268 | 269 | impl PartialEq for TrieDataNode { 270 | /// Operation == can be applied only to TrieNodes whose data implements PartialEq. 271 | fn eq(&self, other: &Self) -> bool { 272 | // If keys aren't equal, nodes aren't equal. 273 | if !(self.children.len() == other.children.len() 274 | && self.children.keys().all(|k| other.children.contains_key(k))) 275 | { 276 | return false; 277 | } 278 | 279 | // If associations aren't equal, two nodes aren't equal. 280 | if !match (&self.word_end_data, &other.word_end_data) { 281 | (Some(self_vec), Some(other_vec)) => { 282 | // If they both have an association, return true only if the data is identical 283 | self_vec.len() == other_vec.len() && self_vec.iter().all(|k| other_vec.contains(k)) 284 | } 285 | // If they both don't have an association, return true 286 | (None, None) => true, 287 | _ => false, 288 | } { 289 | return false; 290 | } 291 | 292 | // Every child node that has the same key (character) must be equal. 293 | self.children 294 | .iter() 295 | .map(|(char, self_child)| (self_child, other.children.get(char).unwrap())) 296 | .all(|(self_child, other_child)| other_child == self_child) 297 | } 298 | } 299 | -------------------------------------------------------------------------------- /src/trie/regular_trie.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | use std::ops; 3 | 4 | use arrayvec::ArrayString; 5 | #[cfg(feature = "serde")] 6 | use serde_crate::{Deserialize, Serialize}; 7 | 8 | use crate::trie::get_characters; 9 | use crate::trie_node::TrieDatalessNode; 10 | 11 | #[derive(Debug, Default, Clone)] 12 | #[cfg_attr( 13 | feature = "serde", 14 | derive(Serialize, Deserialize), 15 | serde(crate = "serde_crate") 16 | )] 17 | pub struct Trie { 18 | root: TrieDatalessNode, 19 | len: usize, 20 | } 21 | 22 | impl Trie { 23 | pub fn new() -> Self { 24 | Trie { 25 | root: TrieDatalessNode::new(), 26 | len: 0, 27 | } 28 | } 29 | 30 | /// Insert a word into the trie, with no corresponding data. 31 | /// 32 | /// # Examples 33 | /// 34 | /// ``` 35 | /// use basic_trie::Trie; 36 | /// let mut trie = Trie::new(); 37 | /// 38 | /// trie.insert("word1"); 39 | /// assert_eq!(vec![String::from("word1")], trie.get_all()); 40 | /// ``` 41 | pub fn insert(&mut self, word: &str) { 42 | let characters = get_characters(word); 43 | let mut current = &mut self.root; 44 | 45 | for character in characters { 46 | current = current 47 | .children 48 | .entry(ArrayString::from(character).unwrap()) 49 | .or_default(); 50 | } 51 | 52 | if !current.is_associated() { 53 | self.len += 1; 54 | } 55 | 56 | current.associate(); 57 | } 58 | 59 | /// Removes a word from the trie. 60 | /// If the word is a prefix to some word, some word 61 | /// isn't removed from the trie. 62 | /// 63 | /// # Examples 64 | /// 65 | /// ``` 66 | /// use basic_trie::Trie; 67 | /// let mut trie = Trie::new(); 68 | /// 69 | /// trie.insert("word"); 70 | /// trie.insert("wording"); 71 | /// 72 | /// trie.remove("word"); 73 | /// assert_eq!(vec![String::from("wording")], trie.get("word").unwrap()); 74 | /// 75 | /// trie.remove("wording"); 76 | /// assert_eq!(Vec::::new(), trie.get_all()); 77 | /// ``` 78 | pub fn remove(&mut self, word: &str) { 79 | let Some(current) = self.get_final_node_mut(word) else { 80 | return; 81 | }; 82 | 83 | let characters = get_characters(word); 84 | 85 | if !current.children.is_empty() { 86 | return if current.is_associated() { 87 | current.disassociate(); 88 | self.len -= 1; 89 | }; 90 | } 91 | 92 | self.root.remove_one_word(characters.into_iter()); 93 | self.len -= 1; 94 | } 95 | 96 | /// Removes every word that begins with 'prefix'. 97 | /// Not including the word 'prefix' if it's present. 98 | /// 99 | /// # Examples 100 | /// 101 | /// ``` 102 | /// use basic_trie::Trie; 103 | /// let mut trie = Trie::new(); 104 | /// 105 | /// trie.insert("eat"); 106 | /// trie.insert("eats"); 107 | /// trie.insert("eating"); 108 | /// trie.insert("eatings"); 109 | /// trie.insert("ea"); 110 | /// 111 | /// trie.remove_prefix("ea"); 112 | /// 113 | /// assert_eq!(vec![String::from("ea")], trie.get_all()); 114 | /// ``` 115 | pub fn remove_prefix(&mut self, prefix: &str) { 116 | let Some(current) = self.get_final_node_mut(prefix) else { 117 | return; 118 | }; 119 | 120 | // (current.is_associated() as usize) is added (subtracted twice) to 121 | // not remove the current word from the count. Literal '1' is not used 122 | // because of calling this function on the root node where 1 should 123 | // not be added. 124 | self.len -= current.remove_all_words() - (current.is_associated() as usize); 125 | } 126 | 127 | /// Returns an option enum with a vector of owned strings 128 | /// representing all found words that begin with 'query'. 129 | /// If the word 'query' doesn't exist, None is returned. 130 | /// 131 | /// # Examples 132 | /// 133 | /// ``` 134 | /// use basic_trie::Trie; 135 | /// let mut trie = Trie::new(); 136 | /// 137 | /// trie.insert("word1"); 138 | /// trie.insert("word2"); 139 | /// 140 | /// let all_correct_words = vec![String::from("word1"), String::from("word2")]; 141 | /// let mut found_words = trie.get("word").unwrap(); 142 | /// found_words.sort(); 143 | /// assert_eq!(all_correct_words, found_words); 144 | /// ``` 145 | pub fn get(&self, query: &str) -> Option> { 146 | let mut substring = String::new(); 147 | let mut current_node = &self.root; 148 | let characters = get_characters(query); 149 | 150 | for character in characters { 151 | current_node = match current_node.children.get(character) { 152 | None => return None, 153 | Some(trie_node) => { 154 | substring.push_str(character); 155 | trie_node 156 | } 157 | } 158 | } 159 | 160 | let mut words_vec = Vec::new(); 161 | current_node.find_words(&substring, &mut words_vec); 162 | 163 | Some(words_vec) 164 | } 165 | 166 | /// Returns the vector of longest words found in the trie. 167 | /// 168 | /// # Examples 169 | /// 170 | /// ``` 171 | /// use basic_trie::Trie; 172 | /// let mut trie = Trie::new(); 173 | /// 174 | /// trie.insert("shortwrd"); 175 | /// trie.insert("verylongword"); 176 | /// trie.insert("somelongword"); 177 | /// 178 | /// let longest_words = vec![String::from("somelongword"), String::from("verylongword")]; 179 | /// let mut found_words = trie.get_longest(); 180 | /// found_words.sort(); 181 | /// assert_eq!(longest_words, found_words); 182 | /// ``` 183 | pub fn get_longest(&self) -> Vec { 184 | let mut words = Vec::new(); 185 | self.root.words_min_max("", &mut words, Ordering::Greater); 186 | words 187 | } 188 | 189 | /// Returns the vector of shortest words found in the trie. 190 | /// 191 | /// # Examples 192 | /// 193 | /// ``` 194 | /// use basic_trie::Trie; 195 | /// let mut trie = Trie::new(); 196 | /// 197 | /// trie.insert("shortwrd"); 198 | /// trie.insert("rlyshort"); 199 | /// trie.insert("verylongword"); 200 | /// 201 | /// let shortest_word = vec![String::from("rlyshort"), String::from("shortwrd")]; 202 | /// let mut found_words = trie.get_shortest(); 203 | /// found_words.sort(); 204 | /// assert_eq!(shortest_word, found_words); 205 | /// ``` 206 | pub fn get_shortest(&self) -> Vec { 207 | let mut words = Vec::new(); 208 | self.root.words_min_max("", &mut words, Ordering::Less); 209 | words 210 | } 211 | 212 | /// Returns the number of words in the trie. 213 | /// 214 | /// # Examples 215 | /// 216 | /// ``` 217 | /// use basic_trie::Trie; 218 | /// let mut trie = Trie::new(); 219 | /// 220 | /// trie.insert("word1"); 221 | /// trie.insert("word2"); 222 | /// trie.insert("word3"); 223 | /// trie.insert("word4"); 224 | /// assert_eq!(4, trie.len()); 225 | /// 226 | /// trie.remove("word1"); 227 | /// assert_eq!(3, trie.len()); 228 | /// 229 | /// trie.remove_prefix("w"); 230 | /// assert_eq!(0, trie.len()); 231 | /// ``` 232 | pub fn len(&self) -> usize { 233 | self.len 234 | } 235 | 236 | /// Returns the number of words that start with 'prefix'. 237 | /// If the sequence 'prefix' is not found, None is returned. 238 | /// 239 | /// # Examples 240 | /// ``` 241 | /// use basic_trie::Trie; 242 | /// let mut trie = Trie::new(); 243 | /// 244 | /// trie.insert("word1"); 245 | /// trie.insert("word2"); 246 | /// trie.insert("word3"); 247 | /// trie.insert("word4"); 248 | /// trie.insert("word"); 249 | /// assert_eq!(4, trie.len_prefix("word")); 250 | /// ``` 251 | pub fn len_prefix(&self, prefix: &str) -> usize { 252 | match self.get_final_node(prefix) { 253 | None => 0, 254 | Some(node) => node.count_words() - node.is_associated() as usize, 255 | } 256 | } 257 | 258 | /// Returns an option enum with a vector of owned strings 259 | /// representing all words in the trie. 260 | /// Order is not guaranteed. 261 | /// 262 | /// # Examples 263 | /// 264 | /// ``` 265 | /// use basic_trie::Trie; 266 | /// let mut trie = Trie::new(); 267 | /// 268 | /// trie.insert("word1"); 269 | /// trie.insert("word2"); 270 | /// trie.insert("word3"); 271 | /// trie.insert("word4"); 272 | /// trie.insert("word5"); 273 | /// 274 | /// let all_words = vec![ 275 | /// String::from("word1"), String::from("word2"), String::from("word3"), 276 | /// String::from("word4"), String::from("word5") 277 | /// ]; 278 | /// 279 | /// let mut found_words = trie.get_all(); 280 | /// found_words.sort(); 281 | /// 282 | /// assert_eq!(all_words, found_words); 283 | /// ``` 284 | pub fn get_all(&self) -> Vec { 285 | self.get("").unwrap() 286 | } 287 | 288 | /// Returns true if the trie contains 'query' as a word. 289 | /// 290 | /// # Examples 291 | /// 292 | /// ``` 293 | /// use basic_trie::Trie; 294 | /// let mut trie = Trie::new(); 295 | /// 296 | /// trie.insert("word"); 297 | /// assert!(trie.contains("word")); 298 | /// assert!(!trie.contains("notfound")); 299 | /// ``` 300 | pub fn contains(&self, query: &str) -> bool { 301 | self.get_final_node(query) 302 | .map_or(false, |node| node.is_associated()) 303 | } 304 | 305 | /// Returns true if no words are in the trie. 306 | /// 307 | /// # Examples 308 | /// 309 | /// ``` 310 | /// use basic_trie::Trie; 311 | /// let mut trie = Trie::new(); 312 | /// 313 | /// trie.insert("word"); 314 | /// trie.remove("word"); 315 | /// 316 | /// assert!(trie.is_empty()); 317 | /// ``` 318 | pub fn is_empty(&self) -> bool { 319 | self.len == 0 320 | } 321 | 322 | /// Removes all words from the trie. 323 | /// 324 | /// # Examples 325 | /// 326 | /// ``` 327 | /// use basic_trie::Trie; 328 | /// let mut trie = Trie::new(); 329 | /// 330 | /// trie.insert("word1"); 331 | /// trie.insert("word2"); 332 | /// trie.insert("word3"); 333 | /// trie.insert("word4"); 334 | /// 335 | /// trie.clear(); 336 | /// assert!(trie.is_empty()); 337 | /// assert_eq!(0, trie.len()); 338 | /// ``` 339 | pub fn clear(&mut self) { 340 | self.root.clear_children(); 341 | self.len = 0; 342 | } 343 | 344 | /// Function for getting the last node in a character sequence. 345 | fn get_final_node(&self, query: &str) -> Option<&TrieDatalessNode> { 346 | let mut current = &self.root; 347 | 348 | for character in get_characters(query) { 349 | current = match current.children.get(character) { 350 | None => return None, 351 | Some(next_node) => next_node, 352 | } 353 | } 354 | 355 | Some(current) 356 | } 357 | 358 | /// Function for getting the last node in a character sequence (mutable). 359 | fn get_final_node_mut(&mut self, query: &str) -> Option<&mut TrieDatalessNode> { 360 | let mut current = &mut self.root; 361 | 362 | for character in get_characters(query) { 363 | current = match current.children.get_mut(character) { 364 | None => return None, 365 | Some(next_node) => next_node, 366 | } 367 | } 368 | 369 | Some(current) 370 | } 371 | } 372 | 373 | impl ops::Add for Trie { 374 | type Output = Trie; 375 | 376 | /// Operation + merges two tries, leaving out duplicate words. 377 | /// The smaller trie is always added to the larger one for efficiency. 378 | /// 379 | /// # Examples 380 | /// 381 | /// ``` 382 | /// use basic_trie::Trie; 383 | /// let mut trie_1 = Trie::new(); 384 | /// trie_1.insert("word1"); 385 | /// trie_1.insert("word2"); 386 | /// trie_1.insert("word"); 387 | /// 388 | /// let mut trie_2 = Trie::new(); 389 | /// trie_2.insert("word3"); 390 | /// trie_2.insert("word"); 391 | /// 392 | /// let mut correct = Trie::new(); 393 | /// correct.insert("word"); 394 | /// correct.insert("word1"); 395 | /// correct.insert("word2"); 396 | /// correct.insert("word3"); 397 | /// 398 | /// let trie_3 = trie_1 + trie_2; 399 | /// 400 | /// assert_eq!(trie_3, correct); 401 | /// ``` 402 | fn add(self, rhs: Self) -> Self::Output { 403 | let (smaller, mut bigger) = if self.len < rhs.len { 404 | (self, rhs) 405 | } else { 406 | (rhs, self) 407 | }; 408 | 409 | bigger.root += smaller.root; 410 | 411 | // Number of words needs to be recalculated. 412 | bigger.len = bigger.root.count_words(); 413 | 414 | bigger 415 | } 416 | } 417 | 418 | impl ops::AddAssign for Trie { 419 | /// Operation += merges two tries, leaving out duplicate words. 420 | /// 421 | /// # Examples 422 | /// 423 | /// ``` 424 | /// use basic_trie::Trie; 425 | /// let mut trie_1 = Trie::new(); 426 | /// trie_1.insert("word1"); 427 | /// trie_1.insert("word2"); 428 | /// trie_1.insert("word"); 429 | /// 430 | /// let mut trie_2 = Trie::new(); 431 | /// trie_2.insert("word3"); 432 | /// trie_2.insert("word"); 433 | /// 434 | /// let mut correct = Trie::new(); 435 | /// correct.insert("word"); 436 | /// correct.insert("word1"); 437 | /// correct.insert("word2"); 438 | /// correct.insert("word3"); 439 | /// 440 | /// trie_1 += trie_2; 441 | /// 442 | /// assert_eq!(trie_1, correct); 443 | /// ``` 444 | fn add_assign(&mut self, rhs: Self) { 445 | self.root += rhs.root; 446 | 447 | // Number of words needs to be recalculated. 448 | self.len = self.root.count_words(); 449 | } 450 | } 451 | 452 | impl PartialEq for Trie { 453 | /// # Examples 454 | /// 455 | /// ``` 456 | /// use basic_trie::Trie; 457 | /// let mut trie_1 = Trie::new(); 458 | /// trie_1.insert("test"); 459 | /// 460 | /// let mut trie_2 = Trie::new(); 461 | /// trie_2.insert("test"); 462 | /// 463 | /// assert_eq!(trie_1, trie_2); 464 | /// 465 | /// trie_2.insert("test2"); 466 | /// 467 | /// assert_ne!(trie_1, trie_2); 468 | /// ``` 469 | fn eq(&self, other: &Self) -> bool { 470 | self.len == other.len && self.root == other.root 471 | } 472 | } 473 | -------------------------------------------------------------------------------- /src/trie/data_trie.rs: -------------------------------------------------------------------------------- 1 | use crate::trie::get_characters; 2 | use crate::trie_node::TrieDataNode; 3 | use arrayvec::ArrayString; 4 | use std::cmp::Ordering; 5 | use std::ops; 6 | 7 | #[cfg(feature = "serde")] 8 | use serde_crate::{Deserialize, Serialize}; 9 | 10 | #[derive(Debug, Default, Clone)] 11 | #[cfg_attr( 12 | feature = "serde", 13 | derive(Serialize, Deserialize), 14 | serde(crate = "serde_crate") 15 | )] 16 | pub struct DataTrie { 17 | root: TrieDataNode, 18 | len: usize, 19 | } 20 | 21 | impl DataTrie { 22 | /// Returns a new instance of the trie. 23 | pub fn new() -> Self { 24 | DataTrie { 25 | root: TrieDataNode::new(), 26 | len: 0, 27 | } 28 | } 29 | 30 | /// Insert a word into the trie, with the corresponding data. 31 | /// 32 | /// # Examples 33 | /// 34 | /// ``` 35 | /// use basic_trie::DataTrie; 36 | /// let mut trie = DataTrie::new(); 37 | /// 38 | /// trie.insert("word1", "somedata"); 39 | /// assert_eq!(vec![String::from("word1")], trie.get_all()); 40 | /// ``` 41 | pub fn insert(&mut self, word: &str, associated_data: D) { 42 | let characters = get_characters(word); 43 | let mut current = &mut self.root; 44 | 45 | for character in characters { 46 | current = current 47 | .children 48 | .entry(ArrayString::from(character).unwrap()) 49 | .or_insert_with(TrieDataNode::new); 50 | } 51 | 52 | if !current.is_associated() { 53 | self.len += 1; 54 | current.associate(); 55 | } 56 | 57 | current.push_data(associated_data); 58 | } 59 | 60 | /// Insert a word into the trie, with no corresponding data. 61 | /// This function is very different from inserting a word into 62 | /// a regular trie, since it enables later attachment of data 63 | /// onto the inserted word. Type of trie must be annotated if 64 | /// this is the first function call. 65 | /// 66 | /// # Examples 67 | /// 68 | /// ``` 69 | /// use basic_trie::DataTrie; 70 | /// let mut trie = DataTrie::<&str>::new(); 71 | /// 72 | /// trie.insert_no_data("word1"); 73 | /// assert_eq!(vec![String::from("word1")], trie.get_all()); 74 | /// 75 | /// trie.insert("word1", "somedata"); 76 | /// assert_eq!(vec![&"somedata"], trie.get_data("word1", false).unwrap()); 77 | /// ``` 78 | pub fn insert_no_data(&mut self, word: &str) { 79 | let characters = get_characters(word); 80 | let mut current = &mut self.root; 81 | 82 | for character in characters { 83 | current = current 84 | .children 85 | .entry(ArrayString::from(character).unwrap()) 86 | .or_insert_with(TrieDataNode::new); 87 | } 88 | 89 | if !current.is_associated() { 90 | self.len += 1; 91 | current.associate(); 92 | } 93 | } 94 | 95 | /// Removes a word from the trie and returns data associated with that word. 96 | /// If the word is a prefix to some word, some word isn't removed from the trie. 97 | /// If the word is not found, None is returned. 98 | /// 99 | /// # Examples 100 | /// 101 | /// ``` 102 | /// use basic_trie::DataTrie; 103 | /// let mut trie = DataTrie::new(); 104 | /// 105 | /// trie.insert("word", "somedata"); 106 | /// trie.insert("wording", "somedata2"); 107 | /// 108 | /// let removed_data1 = trie.remove("word"); 109 | /// assert_eq!(vec![String::from("wording")], trie.get("word").unwrap()); 110 | /// assert_eq!(vec![&"somedata2"], trie.get_data("word", true).unwrap()); 111 | /// assert_eq!(vec!["somedata"], removed_data1.unwrap()); 112 | /// 113 | /// let removed_data2 = trie.remove("wording"); 114 | /// assert_eq!(Vec::::new(), trie.get_all()); 115 | /// assert_eq!(vec!["somedata2"], removed_data2.unwrap()); 116 | /// ``` 117 | pub fn remove(&mut self, word: &str) -> Option> { 118 | let current = self.get_final_node_mut(word)?; 119 | 120 | if !current.children.is_empty() { 121 | return current.clear_word_end_association(false).map(|data_vec| { 122 | self.len -= 1; 123 | data_vec.into_iter().collect() 124 | }); 125 | } 126 | 127 | let characters = get_characters(word); 128 | 129 | self.root 130 | .remove_one_word(characters.into_iter()) 131 | .data 132 | .map_or(Some(Vec::new()), |data_vec| { 133 | self.len -= 1; 134 | Some(data_vec.into_iter().collect()) 135 | }) 136 | } 137 | 138 | /// Removes every word that begins with 'prefix' and collects all removed data. 139 | /// Not including the word 'prefix' if it's present. 140 | /// If the sequence 'prefix' is not found, None is returned. 141 | /// 142 | /// # Examples 143 | /// 144 | /// ``` 145 | /// use basic_trie::DataTrie; 146 | /// let mut trie = DataTrie::new(); 147 | /// 148 | /// trie.insert("eat", "somedata"); 149 | /// trie.insert("eats", "somedata2"); 150 | /// trie.insert("eating", "somedata3"); 151 | /// trie.insert("eatings", "somedata4"); 152 | /// trie.insert("ea", "somedata5"); 153 | /// 154 | /// let mut removed_data = trie.remove_prefix("ea").unwrap(); 155 | /// removed_data.sort(); 156 | /// 157 | /// assert_eq!(vec![String::from("ea")], trie.get_all()); 158 | /// assert_eq!(vec!["somedata", "somedata2", "somedata3", "somedata4"], removed_data); 159 | /// ``` 160 | pub fn remove_prefix(&mut self, prefix: &str) -> Option> { 161 | let current = self.get_final_node_mut(prefix)?; 162 | 163 | let mut data_vec = Vec::new(); 164 | 165 | // Sum must be applied to the node's children and not to the node 166 | // itself because the recursive function must disassociate a node 167 | // to put its data in the vector. The optimization of adding one 168 | // to the count when the node in question isn't root can't be used 169 | // since the original node would've been already disassociated therefore 170 | // not accounted for in self.len. 171 | let word_count = current 172 | .children 173 | .values_mut() 174 | .map(|child| child.remove_all_words_collect(&mut data_vec)) 175 | .sum::(); 176 | current.clear_children(); 177 | 178 | self.len -= word_count; 179 | 180 | Some(data_vec) 181 | } 182 | 183 | /// Returns a vector of references to data of some word or references 184 | /// to all found data of some word prefix when 'soft_match' is set to true. 185 | /// If the word is not found and 'soft_match' is set to false, None is returned. 186 | /// 187 | /// # Examples 188 | /// 189 | /// ``` 190 | /// use basic_trie::DataTrie; 191 | /// let mut trie = DataTrie::new(); 192 | /// 193 | /// trie.insert("word1", "somedata"); 194 | /// trie.insert("word2", "somemoredata"); 195 | /// 196 | /// let hard_data = vec![&"somedata"]; 197 | /// assert_eq!(hard_data, trie.get_data("word1", false).unwrap()); 198 | /// 199 | /// let soft_data = vec![&"somedata", &"somemoredata"]; 200 | /// let mut found_data = trie.get_data("word", true).unwrap(); 201 | /// found_data.sort(); 202 | /// assert_eq!(soft_data, found_data); 203 | /// ``` 204 | pub fn get_data(&self, query: &str, soft_match: bool) -> Option> { 205 | let current = self.get_final_node(query)?; 206 | 207 | return if soft_match { 208 | let mut soft_match_data = Vec::new(); 209 | current.generate_all_data(&mut soft_match_data); 210 | 211 | Some(soft_match_data) 212 | } else { 213 | current 214 | .get_association() 215 | .as_ref() 216 | .map(|data_vec| data_vec.iter().collect()) 217 | }; 218 | } 219 | 220 | /// Returns a vector of mutable references to data of some word that equals 'query' 221 | /// or mutable references to all found data of words that begin with 'query' 222 | /// when 'soft_match' is set to true. 223 | /// If the word is not found and 'soft_match' is set to false, None is returned. 224 | /// 225 | /// ``` 226 | /// use basic_trie::DataTrie; 227 | /// let mut trie = DataTrie::new(); 228 | /// 229 | /// trie.insert("word1", "somedata"); 230 | /// trie.insert("word2", "somemoredata"); 231 | /// trie.insert("word1", "evenmoredata"); 232 | /// 233 | /// let mut found_data = trie.get_data_mut("word1", false).unwrap(); 234 | /// 235 | /// *found_data[0] = "changeddata"; 236 | /// *found_data[1] = "bigchanges"; 237 | /// 238 | /// let hard_data = vec![&"changeddata", &"bigchanges"]; 239 | /// assert_eq!(hard_data, trie.get_data("word1", false).unwrap()); 240 | /// 241 | /// let soft_data = vec![&"0", &"1", &"2"]; 242 | /// let mut found_data_mut = trie.get_data_mut("word", true).unwrap(); 243 | /// found_data_mut.sort(); 244 | /// *found_data_mut[0] = "0"; 245 | /// *found_data_mut[1] = "1"; 246 | /// *found_data_mut[2] = "2"; 247 | /// assert_eq!(soft_data, found_data_mut); 248 | /// ``` 249 | pub fn get_data_mut(&mut self, query: &str, soft_match: bool) -> Option> { 250 | let current = self.get_final_node_mut(query)?; 251 | 252 | return if soft_match { 253 | let mut soft_match_data = Vec::new(); 254 | current.generate_all_data_mut(&mut soft_match_data); 255 | 256 | Some(soft_match_data) 257 | } else { 258 | current 259 | .get_association_mut() 260 | .as_mut() 261 | .map(|data_vec| data_vec.iter_mut().collect()) 262 | }; 263 | } 264 | 265 | /// Clears and returns data of some word. If the word is not found returns None. 266 | /// If there is no data associated to the word, an empty vector is returned. 267 | /// 268 | /// # Examples 269 | /// 270 | /// ``` 271 | /// use basic_trie::DataTrie; 272 | /// let mut trie = DataTrie::new(); 273 | /// 274 | /// trie.insert("word", "data1"); 275 | /// trie.insert("word", "data2"); 276 | /// trie.insert("word", "data3"); 277 | /// let found_data = trie.clear_data("word"); 278 | /// 279 | /// assert_eq!(Vec::<&&str>::new(), trie.get_data("word", false).unwrap()); 280 | /// assert_eq!(vec!["data1", "data2", "data3"], found_data.unwrap()); 281 | /// ``` 282 | pub fn clear_data(&mut self, word: &str) -> Option> { 283 | let current = self.get_final_node_mut(word)?; 284 | 285 | current 286 | .clear_word_end_association(true) 287 | .map(|data_vec| data_vec.into_iter().collect()) 288 | } 289 | 290 | /// Returns an option enum with a vector of owned strings 291 | /// representing all found words that begin with 'query'. 292 | /// If the word 'query' doesn't exist, None is returned. 293 | /// 294 | /// # Examples 295 | /// 296 | /// ``` 297 | /// use basic_trie::DataTrie; 298 | /// let mut data_trie = DataTrie::new(); 299 | /// 300 | /// data_trie.insert("word1", 1); 301 | /// data_trie.insert("word2", 2); 302 | /// 303 | /// let all_correct_words = vec![String::from("word1"), String::from("word2")]; 304 | /// let mut found_words = data_trie.get("word").unwrap(); 305 | /// found_words.sort(); 306 | /// assert_eq!(all_correct_words, found_words); 307 | /// ``` 308 | pub fn get(&self, query: &str) -> Option> { 309 | let mut substring = String::new(); 310 | let mut current_node = &self.root; 311 | let characters = get_characters(query); 312 | 313 | for character in characters { 314 | current_node = match current_node.children.get(character) { 315 | None => return None, 316 | Some(trie_node) => { 317 | substring.push_str(character); 318 | trie_node 319 | } 320 | } 321 | } 322 | 323 | let mut words_vec = Vec::new(); 324 | current_node.find_words(&substring, &mut words_vec); 325 | 326 | Some(words_vec) 327 | } 328 | 329 | /// Returns the vector of longest words found in the trie. 330 | /// 331 | /// # Examples 332 | /// 333 | /// ``` 334 | /// use basic_trie::DataTrie; 335 | /// let mut data_trie = DataTrie::new(); 336 | /// 337 | /// data_trie.insert("shortwrd", 1); 338 | /// data_trie.insert("verylongword", 2); 339 | /// data_trie.insert("somelongword", 2); 340 | /// 341 | /// let longest_words = vec![String::from("somelongword"), String::from("verylongword")]; 342 | /// let mut found_words = data_trie.get_longest(); 343 | /// found_words.sort(); 344 | /// assert_eq!(longest_words, found_words); 345 | /// ``` 346 | pub fn get_longest(&self) -> Vec { 347 | let mut words = Vec::new(); 348 | self.root.words_min_max("", &mut words, Ordering::Greater); 349 | words 350 | } 351 | 352 | /// Returns the vector of shortest words found in the trie. 353 | /// 354 | /// # Examples 355 | /// 356 | /// ``` 357 | /// use basic_trie::DataTrie; 358 | /// let mut data_trie = DataTrie::new(); 359 | /// 360 | /// data_trie.insert("shortwrd", 1); 361 | /// data_trie.insert("rlyshort", 2); 362 | /// data_trie.insert("verylongword", 3); 363 | /// 364 | /// let shortest_word = vec![String::from("rlyshort"), String::from("shortwrd")]; 365 | /// let mut found_words = data_trie.get_shortest(); 366 | /// found_words.sort(); 367 | /// assert_eq!(shortest_word, found_words); 368 | /// ``` 369 | pub fn get_shortest(&self) -> Vec { 370 | let mut words = Vec::new(); 371 | self.root.words_min_max("", &mut words, Ordering::Less); 372 | words 373 | } 374 | 375 | /// Returns the number of words in the trie. 376 | /// 377 | /// # Examples 378 | /// 379 | /// ``` 380 | /// use basic_trie::DataTrie; 381 | /// let mut data_trie = DataTrie::new(); 382 | /// 383 | /// data_trie.insert("word1", 1); 384 | /// data_trie.insert("word2", 2); 385 | /// data_trie.insert("word3", 3); 386 | /// data_trie.insert("word4", 4); 387 | /// assert_eq!(4, data_trie.len()); 388 | /// 389 | /// data_trie.remove("word1"); 390 | /// assert_eq!(3, data_trie.len()); 391 | /// 392 | /// data_trie.remove_prefix("w"); 393 | /// assert_eq!(0, data_trie.len()); 394 | /// ``` 395 | pub fn len(&self) -> usize { 396 | self.len 397 | } 398 | 399 | /// Returns the number of words that start with 'prefix'. 400 | /// If the sequence 'prefix' is not found, None is returned. 401 | /// 402 | /// # Examples 403 | /// ``` 404 | /// use basic_trie::DataTrie; 405 | /// let mut data_trie = DataTrie::new(); 406 | /// 407 | /// data_trie.insert("word1", 1); 408 | /// data_trie.insert("word2", 2); 409 | /// data_trie.insert("word3", 3); 410 | /// data_trie.insert("word4", 4); 411 | /// data_trie.insert("word", 0); 412 | /// assert_eq!(4, data_trie.len_prefix("word")); 413 | /// ``` 414 | pub fn len_prefix(&self, prefix: &str) -> usize { 415 | match self.get_final_node(prefix) { 416 | None => 0, 417 | Some(node) => node.count_words() - node.is_associated() as usize, 418 | } 419 | } 420 | 421 | /// Returns an option enum with a vector of owned strings 422 | /// representing all words in the trie. 423 | /// Order is not guaranteed. 424 | /// 425 | /// # Examples 426 | /// 427 | /// ``` 428 | /// use basic_trie::DataTrie; 429 | /// let mut data_trie = DataTrie::new(); 430 | /// 431 | /// data_trie.insert("word1", 1); 432 | /// data_trie.insert("word2", 2); 433 | /// data_trie.insert("word3", 3); 434 | /// data_trie.insert("word4", 4); 435 | /// data_trie.insert("word5", 5); 436 | /// 437 | /// let all_words = vec![ 438 | /// String::from("word1"), String::from("word2"), String::from("word3"), 439 | /// String::from("word4"), String::from("word5") 440 | /// ]; 441 | /// 442 | /// let mut found_words = data_trie.get_all(); 443 | /// found_words.sort(); 444 | /// 445 | /// assert_eq!(all_words, found_words); 446 | /// ``` 447 | pub fn get_all(&self) -> Vec { 448 | self.get("").unwrap() 449 | } 450 | 451 | /// Returns true if the trie contains 'query' as a word. 452 | /// 453 | /// # Examples 454 | /// 455 | /// ``` 456 | /// use basic_trie::DataTrie; 457 | /// let mut data_trie = DataTrie::new(); 458 | /// 459 | /// data_trie.insert("word", 0); 460 | /// assert!(data_trie.contains("word")); 461 | /// assert!(!data_trie.contains("notfound")); 462 | /// ``` 463 | pub fn contains(&self, query: &str) -> bool { 464 | self.get_final_node(query) 465 | .map_or(false, |node| node.is_associated()) 466 | } 467 | 468 | /// Returns true if no words are in the trie. 469 | /// 470 | /// # Examples 471 | /// 472 | /// ``` 473 | /// use basic_trie::Trie; 474 | /// let mut data_trie = Trie::new(); 475 | /// 476 | /// data_trie.insert("word"); 477 | /// data_trie.remove("word"); 478 | /// 479 | /// assert!(data_trie.is_empty()); 480 | /// ``` 481 | pub fn is_empty(&self) -> bool { 482 | self.len == 0 483 | } 484 | 485 | /// Removes all words from the trie. 486 | /// 487 | /// # Examples 488 | /// 489 | /// ``` 490 | /// use basic_trie::Trie; 491 | /// let mut data_trie = Trie::new(); 492 | /// 493 | /// data_trie.insert("word1"); 494 | /// data_trie.insert("word2"); 495 | /// data_trie.insert("word3"); 496 | /// data_trie.insert("word4"); 497 | /// 498 | /// data_trie.clear(); 499 | /// assert!(data_trie.is_empty()); 500 | /// assert_eq!(0, data_trie.len()); 501 | /// ``` 502 | pub fn clear(&mut self) { 503 | self.root.clear_children(); 504 | self.len = 0; 505 | } 506 | 507 | /// Function for getting the last node in a character sequence. 508 | fn get_final_node(&self, query: &str) -> Option<&TrieDataNode> { 509 | let mut current = &self.root; 510 | 511 | for character in get_characters(query) { 512 | current = match current.children.get(character) { 513 | None => return None, 514 | Some(next_node) => next_node, 515 | } 516 | } 517 | 518 | Some(current) 519 | } 520 | 521 | /// Function for getting the last node in a character sequence (mutable). 522 | fn get_final_node_mut(&mut self, query: &str) -> Option<&mut TrieDataNode> { 523 | let mut current = &mut self.root; 524 | 525 | for character in get_characters(query) { 526 | current = match current.children.get_mut(character) { 527 | None => return None, 528 | Some(next_node) => next_node, 529 | } 530 | } 531 | 532 | Some(current) 533 | } 534 | } 535 | 536 | impl ops::Add for DataTrie { 537 | type Output = DataTrie; 538 | 539 | /// Operation + merges two tries, leaving out duplicate words. 540 | /// The smaller trie is always added to the larger one for efficiency. 541 | /// 542 | /// # Examples 543 | /// 544 | /// ``` 545 | /// use basic_trie::DataTrie; 546 | /// let mut data_trie_1 = DataTrie::new(); 547 | /// data_trie_1.insert("word1", 1); 548 | /// data_trie_1.insert("word2", 2); 549 | /// data_trie_1.insert("word", 0); 550 | /// 551 | /// let mut data_trie_2 = DataTrie::new(); 552 | /// data_trie_2.insert("word3", 3); 553 | /// data_trie_2.insert_no_data("word"); 554 | /// 555 | /// let mut correct = DataTrie::new(); 556 | /// correct.insert("word", 0); 557 | /// correct.insert("word1", 1); 558 | /// correct.insert("word2", 2); 559 | /// correct.insert("word3", 3); 560 | /// 561 | /// let data_trie_3 = data_trie_1 + data_trie_2; 562 | /// 563 | /// assert_eq!(data_trie_3, correct); 564 | /// ``` 565 | fn add(self, rhs: Self) -> Self::Output { 566 | let (smaller, mut bigger) = if self.len < rhs.len { 567 | (self, rhs) 568 | } else { 569 | (rhs, self) 570 | }; 571 | 572 | bigger.root += smaller.root; 573 | 574 | // Number of words needs to be recalculated. 575 | bigger.len = bigger.root.count_words(); 576 | 577 | bigger 578 | } 579 | } 580 | 581 | impl ops::AddAssign for DataTrie { 582 | /// Operation += merges two tries, leaving out duplicate words. 583 | /// 584 | /// # Examples 585 | /// 586 | /// ``` 587 | /// use basic_trie::DataTrie; 588 | /// let mut data_trie_1 = DataTrie::new(); 589 | /// data_trie_1.insert("word1", 1); 590 | /// data_trie_1.insert("word2", 2); 591 | /// data_trie_1.insert("word", 0); 592 | /// 593 | /// let mut data_trie_2 = DataTrie::new(); 594 | /// data_trie_2.insert("word3", 3); 595 | /// data_trie_2.insert_no_data("word"); 596 | /// 597 | /// let mut correct = DataTrie::new(); 598 | /// correct.insert("word", 0); 599 | /// correct.insert("word1", 1); 600 | /// correct.insert("word2", 2); 601 | /// correct.insert("word3", 3); 602 | /// 603 | /// data_trie_1 += data_trie_2; 604 | /// 605 | /// assert_eq!(data_trie_1, correct); 606 | /// ``` 607 | fn add_assign(&mut self, rhs: Self) { 608 | self.root += rhs.root; 609 | 610 | // Number of words needs to be recalculated. 611 | self.len = self.root.count_words(); 612 | } 613 | } 614 | 615 | impl PartialEq for DataTrie { 616 | /// Operation '==' can be applied only to tries whose data implements PartialEq. 617 | /// 618 | /// # Examples 619 | /// 620 | /// ``` 621 | /// use basic_trie::DataTrie; 622 | /// let mut data_trie_1 = DataTrie::new(); 623 | /// data_trie_1.insert("test", 1); 624 | /// 625 | /// let mut data_trie_2 = DataTrie::new(); 626 | /// data_trie_2.insert("test", 1); 627 | /// 628 | /// assert_eq!(data_trie_1, data_trie_2); 629 | /// 630 | /// data_trie_2.insert("test2", 2); 631 | /// 632 | /// assert_ne!(data_trie_1, data_trie_2); 633 | /// ``` 634 | fn eq(&self, other: &Self) -> bool { 635 | self.len == other.len && self.root == other.root 636 | } 637 | } 638 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # Basic Trie 2 | //! 3 | //! [![Test CI](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml/badge.svg)](https://github.com/lukascobbler/basic_trie/actions/workflows/rust.yml) 4 | //! 5 | //! The trie data structure is used for quick access to words and 6 | //! data that should (could) be associated with them. 7 | //! 8 | //! **Basic Trie** is implemented as a tree where each node holds a single character 9 | //! that could point at any other character thus allowing insertion of arbitrary words. 10 | //! 11 | //! #### There are two major implementations: 12 | //! - Trie where words are inserted with nothing attached to them 13 | //! - Data Trie where each word has a corresponding vector of data attached to it 14 | //! 15 | //! Regular tries are often used for word lookups and prefix matching, and data tries are 16 | //! often used for finding all data that is connected to some prefix. 17 | //! 18 | //! For example, when inserting a whole book in the trie, you could insert every word with 19 | //! the corresponding page number it's on. Later when searching for the word, you could get all 20 | //! the pages the word is on with no added performance cost. 21 | //! 22 | //! ## Global features 23 | //! - insertion / removal of words 24 | //! - fast contains check 25 | //! - finding words based on a prefix 26 | //! - longest / shortest words in the trie 27 | //! - generic methods: `is_empty`, `len`, `clear` 28 | //! - Trie equality with `==` 29 | //! - Trie merging with `+` or `+=` 30 | //! 31 | //! ## Data Trie features 32 | //! - generic type implementation for associating a word to any type, with zero trait constraints 33 | //! - finding data of words based on exact match or prefix 34 | //! 35 | //! ## Optional features 36 | //! - unicode support via the 'unicode' feature with the `unicode-segmentation` crate (enabled by default) 37 | //! - data trie support via the 'data' feature (enabled by default) 38 | //! - serialization and deserialization via the 'serde' feature with the `serde` crate 39 | //! 40 | //! ## Dependencies 41 | //! - `unicode-segmentation` (enabled by default) 42 | //! - `serde` (only with 'serde' feature flag) 43 | //! - `fxhash` 44 | //! - `thin-vec` 45 | //! - `arrayvec` 46 | //! 47 | //! ## License 48 | //! The software is licensed under the MIT license. 49 | //! 50 | //! ## Examples 51 | //! 52 | //! ```rust 53 | //! use basic_trie::Trie; 54 | //! 55 | //! let mut trie = Trie::new(); 56 | //! trie.insert("eat"); 57 | //! trie.insert("eating"); 58 | //! trie.insert("wizard"); 59 | //! 60 | //! let mut found_longest_words = trie.get_longest(); 61 | //! found_longest_words.sort(); 62 | //! 63 | //! assert!(trie.contains("wizard")); 64 | //! assert_eq!(vec![String::from("eating"), String::from("wizard")], found_longest_words); 65 | //! assert_eq!(vec![String::from("eat")], trie.get_shortest()); 66 | //! assert_eq!(3, trie.len()); 67 | //! ``` 68 | //! 69 | //! ```rust 70 | //! use basic_trie::DataTrie; 71 | //! 72 | //! let mut data_trie = DataTrie::::new(); 73 | //! data_trie.insert("apple", 1); 74 | //! data_trie.insert("apple", 2); 75 | //! data_trie.insert_no_data("banana"); 76 | //! data_trie.insert("avocado", 15); 77 | //! 78 | //! let mut found_data = data_trie.get_data("apple", false).unwrap(); 79 | //! found_data.sort(); 80 | //! assert_eq!(vec![&1, &2], found_data); 81 | //! 82 | //! let mut found_data = data_trie.get_data("a", true).unwrap(); 83 | //! found_data.sort(); 84 | //! assert_eq!(vec![&1, &2, &15], found_data); 85 | //! 86 | //! assert_eq!(vec![15], data_trie.remove("avocado").unwrap()); 87 | //! ``` 88 | //! 89 | //! ## Changelog 90 | //! - **2.0.0** - Major redesign: increased memory efficiency for the regular Trie (used to be Dataless Trie); 91 | //! Changed API names to better match the standard library; splitting the two implementations code-wise thus 92 | //! fixing the documentation not rendering bug. 93 | //! - **1.2.3** – Adding dependencies for even more memory layout optimisations. 94 | //! - **1.2.2** – More memory optimisations with Box. 95 | //! - **1.2.1** – Memory performance upgrade with Box. Mutable data retrieval. 96 | //! - **1.2.0** – Equality and addition operators support between 97 | //! same Trie types via `==`, `+` and `+=`. 98 | //! - **1.1.1** – Adding `FxHashMap` dependency for boosted performance. 99 | //! - **1.1.0** – Serialization with the `serde` crate and the 'serde' feature. 100 | //! - **1.0.3** – Optimisation of `number_of_words()`. Removing lifetime requirements 101 | //! for word insertion for much better flexibility at the same logical memory cost. 102 | //! - **1.0.2** – Bug fixes. 103 | //! - **1.0.1** – `insert_no_data()` for `DataTrie`. Bugfixes. 104 | //! - **1.0.0** – Separation of `DataTrie` and `DatalessTrie`. Optimizing 105 | //! performance for `DatalessTrie`. Incompatible with older versions. 106 | //! - **<1.0.0** – Simple `Trie` with data and base features. 107 | //! 108 | mod trie; 109 | mod trie_node; 110 | 111 | #[cfg(feature = "data")] 112 | pub use trie::DataTrie; 113 | 114 | pub use trie::Trie; 115 | 116 | // Tests which are the same for both implementations, 117 | // Regular is used for less verbose code. 118 | #[cfg(test)] 119 | mod general_trie_tests { 120 | use crate::Trie; 121 | 122 | #[test] 123 | fn find_words() { 124 | let found_words_correct = vec![ 125 | String::from("word1"), 126 | String::from("word2"), 127 | String::from("word3"), 128 | ]; 129 | 130 | let mut trie = Trie::new(); 131 | 132 | trie.insert("word1"); 133 | trie.insert("word2"); 134 | trie.insert("word3"); 135 | 136 | let mut found_words = trie.get("word").unwrap(); 137 | found_words.sort(); 138 | assert_eq!(found_words, found_words_correct); 139 | } 140 | 141 | #[test] 142 | fn longest_word() { 143 | let mut trie = Trie::new(); 144 | 145 | trie.insert("a"); 146 | assert_eq!(trie.get_longest(), vec![String::from("a")]); 147 | 148 | trie.insert("aa"); 149 | assert_eq!(trie.get_longest(), vec![String::from("aa")]); 150 | 151 | trie.insert("aaa"); 152 | assert_eq!(trie.get_longest(), vec![String::from("aaa")]); 153 | 154 | trie.insert("aaaa"); 155 | assert_eq!(trie.get_longest(), vec![String::from("aaaa")]); 156 | 157 | trie.insert("a"); 158 | assert_eq!(trie.get_longest(), vec![String::from("aaaa")]); 159 | } 160 | 161 | #[test] 162 | fn multiple_longest_words() { 163 | let mut trie = Trie::new(); 164 | 165 | trie.insert("abba"); 166 | trie.insert("cddc"); 167 | 168 | let mut found_words = trie.get_longest(); 169 | found_words.sort(); 170 | 171 | assert_eq!( 172 | vec![String::from("abba"), String::from("cddc")], 173 | found_words 174 | ); 175 | } 176 | 177 | #[test] 178 | fn shortest_word() { 179 | let mut trie = Trie::new(); 180 | 181 | trie.insert("a"); 182 | assert_eq!(trie.get_shortest(), vec![String::from("a")]); 183 | 184 | trie.insert("aa"); 185 | assert_eq!(trie.get_shortest(), vec![String::from("a")]); 186 | 187 | trie.insert("aaa"); 188 | assert_eq!(trie.get_shortest(), vec![String::from("a")]); 189 | 190 | trie.insert("aaaa"); 191 | assert_eq!(trie.get_shortest(), vec![String::from("a")]); 192 | 193 | trie.insert("a"); 194 | assert_eq!(trie.get_shortest(), vec![String::from("a")]); 195 | } 196 | 197 | #[test] 198 | fn multiple_shortest_words() { 199 | let mut trie = Trie::new(); 200 | 201 | trie.insert("aaa"); 202 | trie.insert("aaaa"); 203 | trie.insert("aa"); 204 | trie.insert("bb"); 205 | 206 | let mut found_words = trie.get_shortest(); 207 | found_words.sort(); 208 | 209 | assert_eq!(vec![String::from("aa"), String::from("bb")], found_words); 210 | } 211 | 212 | #[test] 213 | fn number_of_words() { 214 | let mut trie = Trie::new(); 215 | 216 | trie.insert("a"); 217 | trie.insert("b"); 218 | trie.insert("c"); 219 | trie.insert("d"); 220 | 221 | assert_eq!(4, trie.len()); 222 | } 223 | 224 | #[test] 225 | fn same_word_twice() { 226 | let mut trie = Trie::new(); 227 | 228 | trie.insert("twice"); 229 | trie.insert("twice"); 230 | 231 | assert_eq!(vec!["twice"], trie.get("twice").unwrap()); 232 | } 233 | 234 | #[test] 235 | fn all_words() { 236 | let mut trie = Trie::new(); 237 | 238 | trie.insert("a"); 239 | trie.insert("ab"); 240 | trie.insert("abc"); 241 | trie.insert("abcd"); 242 | 243 | let all_words = vec![ 244 | String::from("a"), 245 | String::from("ab"), 246 | String::from("abc"), 247 | String::from("abcd"), 248 | ]; 249 | 250 | assert_eq!(all_words, trie.get_all()) 251 | } 252 | 253 | #[cfg(feature = "unicode")] 254 | #[test] 255 | fn unicode() { 256 | let mut trie = Trie::new(); 257 | 258 | trie.insert("а"); 259 | trie.insert("аб"); 260 | trie.insert("абц"); 261 | trie.insert("абцд"); 262 | 263 | let all_words = vec![ 264 | String::from("а"), 265 | String::from("аб"), 266 | String::from("абц"), 267 | String::from("абцд"), 268 | ]; 269 | 270 | assert_eq!(all_words, trie.get_all()) 271 | } 272 | 273 | #[test] 274 | fn clear() { 275 | let mut trie = Trie::new(); 276 | trie.insert("word1"); 277 | trie.insert("word2"); 278 | trie.insert("word3"); 279 | trie.insert("word4"); 280 | trie.insert("word5"); 281 | 282 | trie.clear(); 283 | } 284 | } 285 | 286 | #[cfg(feature = "data")] 287 | #[cfg(test)] 288 | mod data_trie_tests { 289 | use super::DataTrie; 290 | 291 | #[test] 292 | fn find_data_soft_match() { 293 | let found_data_correct = vec![&1, &2, &3]; 294 | 295 | let mut trie = DataTrie::new(); 296 | 297 | trie.insert("word1", 1); 298 | trie.insert("word2", 2); 299 | trie.insert("word3", 3); 300 | 301 | let mut found_data = trie.get_data("word", true).unwrap(); 302 | found_data.sort(); 303 | assert_eq!(found_data, found_data_correct); 304 | } 305 | 306 | #[test] 307 | fn find_str_data_soft_match() { 308 | let found_data_correct = vec![&"data1", &"data2", &"data3"]; 309 | 310 | let mut trie = DataTrie::new(); 311 | 312 | trie.insert("word1", "data1"); 313 | trie.insert("word2", "data2"); 314 | trie.insert("word3", "data3"); 315 | 316 | let mut found_data = trie.get_data("word", true).unwrap(); 317 | found_data.sort(); 318 | assert_eq!(found_data, found_data_correct); 319 | } 320 | 321 | #[test] 322 | fn find_data_hard_match() { 323 | let found_data_correct = vec![&1]; 324 | 325 | let mut trie = DataTrie::new(); 326 | 327 | trie.insert("word1", 1); 328 | trie.insert("word2", 2); 329 | trie.insert("word3", 3); 330 | 331 | let mut found_data = trie.get_data("word1", false).unwrap(); 332 | found_data.sort(); 333 | assert_eq!(found_data, found_data_correct); 334 | } 335 | 336 | #[test] 337 | fn find_data_hard_match_not_found() { 338 | let found_data_correct = None; 339 | 340 | let mut trie = DataTrie::new(); 341 | 342 | trie.insert("word1", 1); 343 | trie.insert("word2", 2); 344 | trie.insert("word3", 3); 345 | 346 | let found_data = trie.get_data("word", false); 347 | 348 | assert_eq!(found_data, found_data_correct); 349 | } 350 | 351 | #[test] 352 | fn same_word_twice_different_data() { 353 | let mut trie = DataTrie::new(); 354 | 355 | trie.insert("twice", 5); 356 | trie.insert("twice", 3); 357 | 358 | assert_eq!(vec![&5, &3], trie.get_data("twice", true).unwrap()); 359 | } 360 | 361 | #[test] 362 | fn clear_word_data() { 363 | let mut trie = DataTrie::new(); 364 | 365 | trie.insert("twice", 5); 366 | let data = trie.clear_data("twice"); 367 | trie.insert("twice", 3); 368 | 369 | assert_eq!(vec![&3], trie.get_data("twice", true).unwrap()); 370 | assert_eq!(vec![5], data.unwrap()); 371 | } 372 | 373 | #[test] 374 | fn clear_word_no_data() { 375 | let mut trie = DataTrie::new(); 376 | 377 | trie.insert("word1", 5); 378 | let data = trie.clear_data("word2"); 379 | 380 | assert_eq!(None, data); 381 | } 382 | 383 | #[test] 384 | fn remove_word1() { 385 | let mut trie = DataTrie::new(); 386 | 387 | trie.insert("a", 5); 388 | trie.insert("ab", 5); 389 | trie.insert("abc", 5); 390 | trie.insert("abcd", 5); 391 | 392 | trie.remove("a"); 393 | 394 | let all_words = vec![ 395 | String::from("ab"), 396 | String::from("abc"), 397 | String::from("abcd"), 398 | ]; 399 | 400 | assert_eq!(all_words, trie.get_all()) 401 | } 402 | 403 | #[test] 404 | fn remove_word_final() { 405 | let mut trie = DataTrie::new(); 406 | 407 | trie.insert("a", 5); 408 | trie.insert("ab", 5); 409 | trie.insert("abc", 5); 410 | trie.insert("abcd", 5); 411 | 412 | trie.remove("abcd"); 413 | 414 | let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abc")]; 415 | 416 | let mut all_words = trie.get_all(); 417 | all_words.sort(); 418 | 419 | assert_eq!(all_correct_words, all_words); 420 | } 421 | 422 | #[test] 423 | fn remove_word_2() { 424 | let mut trie = DataTrie::new(); 425 | 426 | trie.insert("a", 5); 427 | trie.insert("ab", 5); 428 | trie.insert("abc", 5); 429 | trie.insert("abcd", 5); 430 | 431 | trie.remove("abc"); 432 | 433 | let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abcd")]; 434 | 435 | let mut all_words = trie.get_all(); 436 | all_words.sort(); 437 | 438 | assert_eq!(all_correct_words, all_words); 439 | assert_eq!(vec![&5, &5, &5], trie.get_data("a", true).unwrap()); 440 | } 441 | 442 | #[test] 443 | fn remove_word_3() { 444 | let mut trie = DataTrie::new(); 445 | 446 | trie.insert("eat", 5); 447 | trie.insert("eating", 5); 448 | trie.insert("eats", 5); 449 | trie.insert("eatings", 5); 450 | 451 | trie.remove("eating"); 452 | 453 | let all_correct_words = vec![ 454 | String::from("eat"), 455 | String::from("eatings"), 456 | String::from("eats"), 457 | ]; 458 | 459 | let mut all_words = trie.get_all(); 460 | all_words.sort(); 461 | 462 | assert_eq!(all_correct_words, all_words); 463 | } 464 | 465 | #[test] 466 | fn remove_word_4() { 467 | let mut trie = DataTrie::new(); 468 | 469 | trie.insert("eat", 5); 470 | trie.insert("eating", 5); 471 | trie.insert("eats", 5); 472 | trie.insert("eatings", 5); 473 | 474 | trie.remove("eatings"); 475 | 476 | let all_correct_words = vec![ 477 | String::from("eat"), 478 | String::from("eating"), 479 | String::from("eats"), 480 | ]; 481 | 482 | let mut all_words = trie.get_all(); 483 | all_words.sort(); 484 | 485 | assert_eq!(all_correct_words, all_words); 486 | } 487 | 488 | #[test] 489 | fn remove_word_5() { 490 | let mut trie = DataTrie::new(); 491 | 492 | trie.insert("eat", 5); 493 | trie.insert("eating", 5); 494 | trie.insert("eats", 5); 495 | trie.insert("eatings", 5); 496 | 497 | let data = trie.remove("eatin"); 498 | 499 | let all_correct_words = vec![ 500 | String::from("eat"), 501 | String::from("eating"), 502 | String::from("eatings"), 503 | String::from("eats"), 504 | ]; 505 | 506 | let mut all_words = trie.get_all(); 507 | all_words.sort(); 508 | 509 | assert_eq!(all_correct_words, all_words); 510 | assert_eq!(None, data); 511 | } 512 | 513 | #[test] 514 | fn remove_word_6() { 515 | let mut trie = DataTrie::new(); 516 | 517 | trie.insert("eat", 5); 518 | trie.insert("eatings", 5); 519 | 520 | trie.remove("eatings"); 521 | 522 | let all_correct_words = vec![String::from("eat")]; 523 | 524 | let mut all_words = trie.get_all(); 525 | all_words.sort(); 526 | 527 | assert_eq!(all_correct_words, all_words); 528 | } 529 | 530 | #[test] 531 | fn remove_word_7() { 532 | let mut trie = DataTrie::new(); 533 | 534 | trie.insert("eat", 3); 535 | trie.insert("eatings", 5); 536 | 537 | let data1 = trie.remove("eatings"); 538 | 539 | let all_correct_words = vec![String::from("eat")]; 540 | 541 | let mut all_words = trie.get_all(); 542 | all_words.sort(); 543 | 544 | assert_eq!(all_correct_words, all_words); 545 | 546 | assert_eq!(vec![5], data1.unwrap()); 547 | 548 | let data2 = trie.remove("eat"); 549 | 550 | assert_eq!(vec![3], data2.unwrap()); 551 | } 552 | 553 | #[test] 554 | fn remove_word_8() { 555 | let mut trie = DataTrie::new(); 556 | 557 | trie.insert("eat", 3); 558 | trie.insert("eats", 4); 559 | trie.insert("eatings", 5); 560 | 561 | let data = trie.remove("eats"); 562 | 563 | let all_correct_words = vec![String::from("eat"), String::from("eatings")]; 564 | 565 | let mut all_words = trie.get_all(); 566 | all_words.sort(); 567 | 568 | assert_eq!(all_correct_words, all_words); 569 | assert_eq!(vec![4], data.unwrap()); 570 | 571 | let mut remaining_data = trie.get_data("eat", true).unwrap(); 572 | remaining_data.sort(); 573 | 574 | assert_eq!(vec![&3, &5], remaining_data); 575 | } 576 | 577 | #[test] 578 | fn remove_prefix_1() { 579 | let mut trie = DataTrie::new(); 580 | 581 | trie.insert("eat", 3); 582 | trie.insert("eating", 4); 583 | trie.insert("eats", 5); 584 | trie.insert("eatings", 6); 585 | trie.insert("ea", 7); 586 | 587 | let mut removed_data = trie.remove_prefix("ea").unwrap(); 588 | removed_data.sort(); 589 | 590 | assert_eq!(vec![String::from("ea")], trie.get_all()); 591 | assert_eq!(vec![3, 4, 5, 6], removed_data); 592 | assert_eq!(1, trie.len()); 593 | } 594 | 595 | #[test] 596 | fn remove_prefix_2() { 597 | let mut trie = DataTrie::new(); 598 | 599 | trie.insert("a1", 3); 600 | trie.insert("b2", 4); 601 | trie.insert("c3", 5); 602 | 603 | let mut removed_data = trie.remove_prefix("").unwrap(); 604 | removed_data.sort(); 605 | 606 | assert_eq!(Vec::::new(), trie.get_all()); 607 | assert!(trie.is_empty()); 608 | assert_eq!(0, trie.len()); 609 | assert_eq!(vec![3, 4, 5], removed_data); 610 | } 611 | 612 | #[cfg(feature = "unicode")] 613 | #[test] 614 | fn unicode_data() { 615 | let mut trie = DataTrie::new(); 616 | 617 | trie.insert("а", 5); 618 | trie.insert("аб", 5); 619 | trie.insert("абц", 5); 620 | trie.insert("абцд", 5); 621 | 622 | let all_data = vec![&5, &5, &5, &5]; 623 | 624 | assert_eq!(all_data, trie.get_data("а", true).unwrap()) 625 | } 626 | 627 | #[test] 628 | fn insert_no_data() { 629 | let mut trie = DataTrie::<&str>::new(); 630 | 631 | trie.insert_no_data("word1"); 632 | assert_eq!(vec![String::from("word1")], trie.get_all()); 633 | 634 | trie.insert("word1", "somedata"); 635 | assert_eq!(vec![&"somedata"], trie.get_data("word1", false).unwrap()); 636 | } 637 | 638 | #[test] 639 | fn equals_1() { 640 | let mut data_trie_1 = DataTrie::new(); 641 | data_trie_1.insert("test", 1); 642 | 643 | let mut data_trie_2 = DataTrie::new(); 644 | data_trie_2.insert("test", 1); 645 | 646 | assert_eq!(data_trie_1, data_trie_2); 647 | } 648 | 649 | #[test] 650 | fn equals_2() { 651 | let mut data_trie_1 = DataTrie::new(); 652 | data_trie_1.insert("test", 1); 653 | 654 | let mut data_trie_2 = DataTrie::new(); 655 | data_trie_2.insert("test", 1); 656 | data_trie_2.insert("test2", 1); 657 | 658 | assert_ne!(data_trie_1, data_trie_2); 659 | } 660 | 661 | #[test] 662 | fn equals_3() { 663 | let mut data_trie_1 = DataTrie::new(); 664 | data_trie_1.insert("test", 1); 665 | data_trie_1.insert("test2", 1); 666 | 667 | let mut data_trie_2 = DataTrie::new(); 668 | data_trie_2.insert("test", 1); 669 | 670 | assert_ne!(data_trie_1, data_trie_2); 671 | } 672 | 673 | #[test] 674 | fn add_two_tries_1() { 675 | let mut t1 = DataTrie::::new(); 676 | t1.insert("word1", 1000); 677 | t1.insert("word2", 1000); 678 | t1.insert("apple", 1000); 679 | t1.insert("banana", 1000); 680 | 681 | let mut t2 = DataTrie::::new(); 682 | t2.insert("word3", 1000); 683 | t2.insert("word4", 1000); 684 | t2.insert("potato", 1000); 685 | t2.insert("watermelon", 1000); 686 | 687 | let t3 = t1 + t2; 688 | 689 | let mut correct = DataTrie::::new(); 690 | correct.insert("word1", 1000); 691 | correct.insert("word2", 1000); 692 | correct.insert("apple", 1000); 693 | correct.insert("banana", 1000); 694 | correct.insert("word3", 1000); 695 | correct.insert("word4", 1000); 696 | correct.insert("potato", 1000); 697 | correct.insert("watermelon", 1000); 698 | 699 | let mut t3_words = t3.get_all(); 700 | let mut correct_words = correct.get_all(); 701 | 702 | t3_words.sort(); 703 | correct_words.sort(); 704 | assert_eq!(t3_words, correct_words); 705 | assert_eq!(t3, correct); 706 | 707 | let t3_data = t3.get_data("", true).unwrap(); 708 | assert_eq!(t3_data, Vec::from([&1000; 8])); 709 | } 710 | 711 | #[test] 712 | fn add_two_tries_2() { 713 | let mut t1 = DataTrie::::new(); 714 | t1.insert("word1", 1000); 715 | t1.insert("word2", 1000); 716 | t1.insert("apple", 1000); 717 | t1.insert("banana", 1000); 718 | 719 | let mut t2 = DataTrie::::new(); 720 | t2.insert("word3", 1000); 721 | t2.insert("word4", 1000); 722 | t2.insert("potato", 1000); 723 | t2.insert("watermelon", 1000); 724 | 725 | t1 += t2; 726 | 727 | let mut correct = DataTrie::::new(); 728 | correct.insert("word1", 1000); 729 | correct.insert("word2", 1000); 730 | correct.insert("apple", 1000); 731 | correct.insert("banana", 1000); 732 | correct.insert("word3", 1000); 733 | correct.insert("word4", 1000); 734 | correct.insert("potato", 1000); 735 | correct.insert("watermelon", 1000); 736 | 737 | let mut t1_words = t1.get_all(); 738 | let mut correct_words = correct.get_all(); 739 | 740 | t1_words.sort(); 741 | correct_words.sort(); 742 | assert_eq!(t1_words, correct_words); 743 | assert_eq!(t1, correct); 744 | 745 | let t1_data = t1.get_data("", true).unwrap(); 746 | assert_eq!(t1_data, Vec::from([&1000; 8])); 747 | } 748 | 749 | #[test] 750 | fn add_two_tries_3() { 751 | let mut t1 = DataTrie::::new(); 752 | t1.insert("word1", 500); 753 | 754 | let mut t2 = DataTrie::::new(); 755 | t2.insert("word2", 500); 756 | t2.insert("word", 500); 757 | 758 | t1 += t2; 759 | 760 | let mut correct = DataTrie::::new(); 761 | correct.insert("word", 500); 762 | correct.insert("word1", 500); 763 | correct.insert("word2", 500); 764 | 765 | let mut t1_words = t1.get_all(); 766 | let mut correct_words = correct.get_all(); 767 | 768 | t1_words.sort(); 769 | correct_words.sort(); 770 | assert_eq!(t1_words, correct_words); 771 | assert_eq!(t1, correct); 772 | 773 | let t1_data = t1.get_data("", true).unwrap(); 774 | assert_eq!(t1_data, Vec::from([&500; 3])); 775 | } 776 | 777 | #[test] 778 | fn add_two_tries_4() { 779 | let mut t1 = DataTrie::::new(); 780 | t1.insert("word1", 500); 781 | t1.insert("word1", 500); 782 | t1.insert("word1", 500); 783 | 784 | let mut t2 = DataTrie::::new(); 785 | t2.insert("word1", 500); 786 | t2.insert("word1", 500); 787 | t2.insert("word1", 500); 788 | 789 | t1 += t2; 790 | 791 | let mut correct = DataTrie::::new(); 792 | correct.insert("word1", 500); 793 | 794 | let mut t1_words = t1.get_all(); 795 | let mut correct_words = correct.get_all(); 796 | 797 | t1_words.sort(); 798 | correct_words.sort(); 799 | assert_eq!(t1_words, correct_words); 800 | 801 | let t1_data = t1.get_data("", true).unwrap(); 802 | assert_eq!(t1_data, Vec::from([&500; 6])); 803 | } 804 | 805 | #[test] 806 | fn add_two_tries_5() { 807 | let mut t1 = DataTrie::::new(); 808 | t1.insert("word1", 500); 809 | t1.insert("word1", 500); 810 | t1.insert("word1", 500); 811 | 812 | let mut t2 = DataTrie::::new(); 813 | t2.insert("word1", 500); 814 | t2.insert("word1", 500); 815 | t2.insert("word1", 500); 816 | 817 | t1 += t2; 818 | 819 | let mut correct = DataTrie::::new(); 820 | correct.insert("word1", 500); 821 | 822 | let mut t1_words = t1.get_all(); 823 | let mut correct_words = correct.get_all(); 824 | 825 | t1_words.sort(); 826 | correct_words.sort(); 827 | assert_eq!(t1_words, correct_words); 828 | 829 | let t1_data = t1.get_data("", true).unwrap(); 830 | assert_eq!(t1_data, Vec::from([&500; 6])); 831 | } 832 | } 833 | 834 | #[cfg(test)] 835 | mod regular_trie_tests { 836 | use crate::Trie; 837 | 838 | #[test] 839 | fn insert_no_data() { 840 | let mut trie = Trie::new(); 841 | 842 | let found_words_correct = vec![ 843 | String::from("word1"), 844 | String::from("word2"), 845 | String::from("word3"), 846 | ]; 847 | 848 | trie.insert("word1"); 849 | trie.insert("word2"); 850 | trie.insert("word3"); 851 | 852 | let mut found_words = trie.get("word").unwrap(); 853 | found_words.sort(); 854 | 855 | assert_eq!(found_words, found_words_correct); 856 | } 857 | 858 | #[test] 859 | fn remove_word1() { 860 | let mut trie = Trie::new(); 861 | 862 | trie.insert("a"); 863 | trie.insert("ab"); 864 | trie.insert("abc"); 865 | trie.insert("abcd"); 866 | 867 | trie.remove("a"); 868 | 869 | let all_words = vec![ 870 | String::from("ab"), 871 | String::from("abc"), 872 | String::from("abcd"), 873 | ]; 874 | 875 | assert_eq!(all_words, trie.get_all()) 876 | } 877 | 878 | #[test] 879 | fn remove_word_final() { 880 | let mut trie = Trie::new(); 881 | 882 | trie.insert("a"); 883 | trie.insert("ab"); 884 | trie.insert("abc"); 885 | trie.insert("abcd"); 886 | 887 | trie.remove("abcd"); 888 | 889 | let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abc")]; 890 | 891 | let mut all_words = trie.get_all(); 892 | all_words.sort(); 893 | 894 | assert_eq!(all_correct_words, all_words); 895 | } 896 | 897 | #[test] 898 | fn remove_word_2() { 899 | let mut trie = Trie::new(); 900 | 901 | trie.insert("a"); 902 | trie.insert("ab"); 903 | trie.insert("abc"); 904 | trie.insert("abcd"); 905 | 906 | trie.remove("abc"); 907 | 908 | let all_correct_words = vec![String::from("a"), String::from("ab"), String::from("abcd")]; 909 | 910 | let mut all_words = trie.get_all(); 911 | all_words.sort(); 912 | 913 | assert_eq!(all_correct_words, all_words); 914 | } 915 | 916 | #[test] 917 | fn remove_word_3() { 918 | let mut trie = Trie::new(); 919 | 920 | trie.insert("eat"); 921 | trie.insert("eating"); 922 | trie.insert("eats"); 923 | trie.insert("eatings"); 924 | 925 | trie.remove("eating"); 926 | 927 | let all_correct_words = vec![ 928 | String::from("eat"), 929 | String::from("eatings"), 930 | String::from("eats"), 931 | ]; 932 | 933 | let mut all_words = trie.get_all(); 934 | all_words.sort(); 935 | 936 | assert_eq!(all_correct_words, all_words); 937 | } 938 | 939 | #[test] 940 | fn remove_word_4() { 941 | let mut trie = Trie::new(); 942 | 943 | trie.insert("eat"); 944 | trie.insert("eating"); 945 | trie.insert("eats"); 946 | trie.insert("eatings"); 947 | 948 | trie.remove("eatings"); 949 | 950 | let all_correct_words = vec![ 951 | String::from("eat"), 952 | String::from("eating"), 953 | String::from("eats"), 954 | ]; 955 | 956 | let mut all_words = trie.get_all(); 957 | all_words.sort(); 958 | 959 | assert_eq!(all_correct_words, all_words); 960 | } 961 | 962 | #[test] 963 | fn remove_word_5() { 964 | let mut trie = Trie::new(); 965 | 966 | trie.insert("eat"); 967 | trie.insert("eating"); 968 | trie.insert("eats"); 969 | trie.insert("eatings"); 970 | 971 | trie.remove("eatin"); 972 | 973 | let all_correct_words = vec![ 974 | String::from("eat"), 975 | String::from("eating"), 976 | String::from("eatings"), 977 | String::from("eats"), 978 | ]; 979 | 980 | let mut all_words = trie.get_all(); 981 | all_words.sort(); 982 | 983 | assert_eq!(all_correct_words, all_words); 984 | } 985 | 986 | #[test] 987 | fn remove_word_6() { 988 | let mut trie = Trie::new(); 989 | 990 | trie.insert("eat"); 991 | trie.insert("eatings"); 992 | 993 | trie.remove("eatings"); 994 | 995 | let all_correct_words = vec![String::from("eat")]; 996 | 997 | let mut all_words = trie.get_all(); 998 | all_words.sort(); 999 | 1000 | assert_eq!(all_correct_words, all_words); 1001 | } 1002 | 1003 | #[test] 1004 | fn remove_word_7() { 1005 | let mut trie = Trie::new(); 1006 | 1007 | trie.insert("eat"); 1008 | trie.insert("eatings"); 1009 | 1010 | trie.remove("eatings"); 1011 | 1012 | let all_correct_words = vec![String::from("eat")]; 1013 | 1014 | let mut all_words = trie.get_all(); 1015 | all_words.sort(); 1016 | 1017 | assert_eq!(all_correct_words, all_words); 1018 | } 1019 | 1020 | #[test] 1021 | fn remove_word_8() { 1022 | let mut trie = Trie::new(); 1023 | 1024 | trie.insert("eat"); 1025 | trie.insert("eats"); 1026 | trie.insert("eating"); 1027 | 1028 | trie.remove("eats"); 1029 | 1030 | let all_correct_words = vec![String::from("eat"), String::from("eating")]; 1031 | 1032 | let mut all_words = trie.get_all(); 1033 | all_words.sort(); 1034 | 1035 | assert_eq!(all_correct_words, all_words); 1036 | } 1037 | 1038 | #[test] 1039 | fn remove_word_9() { 1040 | let mut trie = Trie::new(); 1041 | 1042 | trie.insert("123"); 1043 | trie.insert("1234"); 1044 | trie.insert("12345"); 1045 | 1046 | trie.remove("1234"); 1047 | 1048 | let all_correct_words = vec![String::from("123"), String::from("12345")]; 1049 | 1050 | let mut all_words = trie.get_all(); 1051 | all_words.sort(); 1052 | 1053 | assert_eq!(all_correct_words, all_words); 1054 | } 1055 | 1056 | #[test] 1057 | fn remove_prefix_1() { 1058 | let mut trie = Trie::new(); 1059 | 1060 | trie.insert("eat"); 1061 | trie.insert("eating"); 1062 | trie.insert("eats"); 1063 | trie.insert("eatings"); 1064 | trie.insert("ea"); 1065 | 1066 | trie.remove_prefix("ea"); 1067 | 1068 | assert_eq!(vec![String::from("ea")], trie.get_all()); 1069 | assert_eq!(1, trie.len()); 1070 | } 1071 | 1072 | #[test] 1073 | fn remove_prefix_2() { 1074 | let mut trie = Trie::new(); 1075 | 1076 | trie.insert("a1"); 1077 | trie.insert("b2"); 1078 | trie.insert("c3"); 1079 | 1080 | trie.remove_prefix(""); 1081 | 1082 | assert_eq!(Vec::::new(), trie.get_all()); 1083 | assert!(trie.is_empty()); 1084 | assert_eq!(0, trie.len()); 1085 | } 1086 | 1087 | #[test] 1088 | fn equals() { 1089 | let mut trie_1 = Trie::new(); 1090 | trie_1.insert("test"); 1091 | 1092 | let mut trie_2 = Trie::new(); 1093 | trie_2.insert("test"); 1094 | 1095 | assert_eq!(trie_1, trie_2); 1096 | } 1097 | 1098 | #[test] 1099 | fn add_two_tries_1() { 1100 | let mut t1 = Trie::new(); 1101 | t1.insert("word1"); 1102 | t1.insert("word2"); 1103 | t1.insert("apple"); 1104 | t1.insert("banana"); 1105 | 1106 | let mut t2 = Trie::new(); 1107 | t2.insert("word3"); 1108 | t2.insert("word4"); 1109 | t2.insert("potato"); 1110 | t2.insert("pineapple"); 1111 | 1112 | let t3 = t1 + t2; 1113 | 1114 | let mut correct = Trie::new(); 1115 | correct.insert("word1"); 1116 | correct.insert("word2"); 1117 | correct.insert("apple"); 1118 | correct.insert("banana"); 1119 | correct.insert("word3"); 1120 | correct.insert("word4"); 1121 | correct.insert("potato"); 1122 | correct.insert("pineapple"); 1123 | 1124 | let mut t3_words = t3.get_all(); 1125 | let mut correct_words = correct.get_all(); 1126 | 1127 | t3_words.sort(); 1128 | correct_words.sort(); 1129 | assert_eq!(t3_words, correct_words); 1130 | } 1131 | 1132 | #[test] 1133 | fn add_two_tries_2() { 1134 | let mut t1 = Trie::new(); 1135 | t1.insert("word1"); 1136 | t1.insert("word2"); 1137 | t1.insert("apple"); 1138 | t1.insert("banana"); 1139 | 1140 | let mut t2 = Trie::new(); 1141 | t2.insert("word3"); 1142 | t2.insert("word4"); 1143 | t2.insert("potato"); 1144 | t2.insert("watermelon"); 1145 | 1146 | t1 += t2; 1147 | 1148 | let mut correct = Trie::new(); 1149 | correct.insert("word1"); 1150 | correct.insert("word2"); 1151 | correct.insert("apple"); 1152 | correct.insert("banana"); 1153 | correct.insert("word3"); 1154 | correct.insert("word4"); 1155 | correct.insert("potato"); 1156 | correct.insert("watermelon"); 1157 | 1158 | let mut t1_words = t1.get_all(); 1159 | let mut correct_words = correct.get_all(); 1160 | 1161 | t1_words.sort(); 1162 | correct_words.sort(); 1163 | assert_eq!(t1_words, correct_words); 1164 | } 1165 | 1166 | #[test] 1167 | fn add_two_tries_3() { 1168 | let mut t1 = Trie::new(); 1169 | t1.insert("word1"); 1170 | 1171 | let mut t2 = Trie::new(); 1172 | t2.insert("word2"); 1173 | t2.insert("word"); 1174 | 1175 | t1 += t2; 1176 | 1177 | let mut correct = Trie::new(); 1178 | correct.insert("word"); 1179 | correct.insert("word1"); 1180 | correct.insert("word2"); 1181 | 1182 | let mut t1_words = t1.get_all(); 1183 | let mut correct_words = correct.get_all(); 1184 | 1185 | t1_words.sort(); 1186 | correct_words.sort(); 1187 | assert_eq!(t1_words, correct_words); 1188 | } 1189 | } 1190 | --------------------------------------------------------------------------------