├── LARGE_SERIAL_FORM.txt ├── LARGE_TEST_DATA.txt ├── README.md ├── SERIAL_FORM.txt ├── TEST_DATA.txt └── serializable_trie.cpp /README.md: -------------------------------------------------------------------------------- 1 | # trieDS_serializable 2 | This is a library for the trie data structure which can be converted to/from a list of strings. 3 | a user can do the following: 4 | 5 | 1. Create a trie using a list of words or an empty trie. 6 | 2. insert words to the trie. 7 | 3. convert the trie to a list of words. 8 | 4. check how many words in the trie have a given prefix. 9 | 5. check if a given word is present in the trie. 10 | 6. Convert the trie to a string i.e. serialize the trie. 11 | 7. Create a trie using a string representing the trie. 12 | 13 | Note: Serialization of the trie to a string makes use of special characters '>' and ']'. The user is expected to not use these symbols in words that are to be inserted to the trie.
14 | Test data taken from https://github.com/dwyl/english-words 15 | 16 | written in c++. 17 | -------------------------------------------------------------------------------- /serializable_trie.cpp: -------------------------------------------------------------------------------- 1 | ///author : Kartik Arora 2 | #include 3 | using namespace std; 4 | 5 | //This structure is used to represent a node of the trie. 6 | struct trieNode 7 | { 8 | //indicates if the word ending here is a valid word. 9 | bool isValidWord; 10 | //number of words have the prefix that is represented by this node. 11 | int wordsWithPrefix; 12 | //represent all child nodes of this node. 13 | unordered_map 14 | children; 15 | 16 | trieNode() 17 | { 18 | isValidWord = false; 19 | wordsWithPrefix = 0; 20 | } 21 | 22 | }; 23 | 24 | //This class represents a generic trie data structure 25 | class TrieDS 26 | { 27 | private: 28 | 29 | trieNode *root; 30 | 31 | int totalWords; 32 | 33 | //helper function that traverse the trie and finds all word present in trie. 34 | void treeDepthTraversal(trieNode *currentNode, 35 | vector& trieToString,string prefix) 36 | { 37 | if(currentNode->isValidWord) 38 | { 39 | trieToString.push_back(prefix); 40 | } 41 | for(auto p : currentNode->children) 42 | { 43 | prefix.push_back(p.first); 44 | treeDepthTraversal(p.second, trieToString, prefix); 45 | prefix.pop_back(); 46 | } 47 | } 48 | 49 | //helper function 50 | void buildSerialTrie(trieNode *root, string &serialTrie) 51 | { 52 | if(root->isValidWord) 53 | serialTrie.push_back(']'); 54 | unordered_map &children = root->children; 55 | for(auto child : children) 56 | { 57 | serialTrie.push_back(child.first); 58 | buildSerialTrie(child.second, serialTrie); 59 | } 60 | serialTrie.push_back('>'); 61 | } 62 | 63 | //helper function 64 | int stringToTrie(trieNode *node, string &serialTrie, int &read) 65 | { 66 | int wordSeen = 0; 67 | if(serialTrie[read] == ']') 68 | { 69 | node->isValidWord = 1; 70 | wordSeen++; 71 | read++; 72 | } 73 | else node->isValidWord = 0; 74 | 75 | 76 | unordered_map &childMap = node->children; 77 | while(serialTrie[read] != '>') 78 | { 79 | char ch = serialTrie[read++]; 80 | childMap[ch] = new trieNode(); 81 | wordSeen += stringToTrie(childMap[ch], serialTrie, read); 82 | } 83 | read++; 84 | node->wordsWithPrefix = wordSeen; 85 | return wordSeen; 86 | } 87 | 88 | public: 89 | 90 | //used to construct an empty trie. 91 | TrieDS() 92 | { 93 | root = new trieNode(); 94 | totalWords = 0; 95 | } 96 | 97 | //Used to construct a trie initialized with a list of words. 98 | TrieDS(vector words) 99 | { 100 | totalWords = 0; 101 | root = new trieNode(); 102 | for(string word : words) 103 | { 104 | insertWord(word); 105 | } 106 | } 107 | 108 | //Used to construct a trie using an existing trie given in serial format. 109 | TrieDS(string serialTrie) 110 | { 111 | root = new trieNode(); 112 | int read = 0; 113 | totalWords = stringToTrie(root, serialTrie, read); 114 | } 115 | 116 | //used to insert a word in the trie. 117 | void insertWord(string word) 118 | { 119 | trieNode *currentNode = root; 120 | for(int i = 0; i < word.length(); i++) 121 | { 122 | currentNode->wordsWithPrefix++; 123 | unordered_map& childMap = (currentNode->children); 124 | auto it = childMap.find(word[i]); 125 | if(it == childMap.end()) 126 | { 127 | currentNode = new trieNode(); 128 | childMap[word[i]] = currentNode; 129 | } 130 | else 131 | { 132 | currentNode = childMap[word[i]]; 133 | } 134 | } 135 | currentNode->wordsWithPrefix++; 136 | currentNode->isValidWord = true; 137 | totalWords++; 138 | } 139 | 140 | //gives the total number of words currently present in the trie. 141 | int numberOfWordsInTrie() 142 | { 143 | return totalWords; 144 | } 145 | 146 | //Used to convert the trie to a list of words(serialization). 147 | vector trieToList() 148 | { 149 | vector trieToString; 150 | treeDepthTraversal(root, trieToString, ""); 151 | return trieToString; 152 | } 153 | 154 | //check to see if a word is present in the trie. 155 | bool findWord(string word) 156 | { 157 | int countOfWord = wordsWithThePrefix(word,0); 158 | if(countOfWord>0) 159 | return true; 160 | else return false; 161 | } 162 | 163 | //Number of words in the trie the have a given prefix. 164 | int wordsWithThePrefix(string prefix, int calledBy = 1) 165 | { 166 | if(root==NULL) 167 | return 0; 168 | trieNode *currentNode = root; 169 | for(int i = 0; i < prefix.length(); i++) 170 | { 171 | unordered_map& childMap = (currentNode->children); 172 | auto it = childMap.find(prefix[i]); 173 | if(it == childMap.end()) 174 | return 0; 175 | else 176 | currentNode = childMap[prefix[i]]; 177 | } 178 | if(calledBy||currentNode->isValidWord) 179 | return currentNode->wordsWithPrefix; 180 | else return 0; 181 | } 182 | 183 | string trieToString() 184 | { 185 | string serializedTrie; 186 | buildSerialTrie(root, serializedTrie); 187 | return serializedTrie; 188 | } 189 | 190 | }; 191 | 192 | int main() 193 | { 194 | /* 195 | *THIS PROGRAM USES THE TEST DATA 196 | *TO BUILD A TRIE FOR THE GIVEN DATA 197 | *THEN IT CONVERTS THE TRIE INTO ITS 198 | *SERIALIZED FORM AND WRITES IT TO A FILE 199 | */ 200 | freopen ("TEST_DATA.txt", "r", stdin); 201 | freopen ("SERIAL_FORM.txt", "w", stdout); 202 | 203 | vector input; 204 | string word; 205 | 206 | cin>>word; 207 | while(word != "#END#") 208 | { 209 | input.push_back(word); 210 | cin>>word; 211 | } 212 | 213 | TrieDS *myTrie = new TrieDS(input); 214 | cout<trieToString(); 215 | return 0; 216 | } 217 | --------------------------------------------------------------------------------