├── LARGE_SERIAL_FORM.txt
├── LARGE_TEST_DATA.txt
├── README.md
├── SERIAL_FORM.txt
├── TEST_DATA.txt
└── serializable_trie.cpp
/README.md:
--------------------------------------------------------------------------------
1 | # trieDS_serializable
2 | This is a library for the trie data structure which can be converted to/from a list of strings.
3 | a user can do the following:
4 |
5 | 1. Create a trie using a list of words or an empty trie.
6 | 2. insert words to the trie.
7 | 3. convert the trie to a list of words.
8 | 4. check how many words in the trie have a given prefix.
9 | 5. check if a given word is present in the trie.
10 | 6. Convert the trie to a string i.e. serialize the trie.
11 | 7. Create a trie using a string representing the trie.
12 |
13 | Note: Serialization of the trie to a string makes use of special characters '>' and ']'. The user is expected to not use these symbols in words that are to be inserted to the trie.
14 | Test data taken from https://github.com/dwyl/english-words
15 |
16 | written in c++.
17 |
--------------------------------------------------------------------------------
/serializable_trie.cpp:
--------------------------------------------------------------------------------
1 | ///author : Kartik Arora
2 | #include
3 | using namespace std;
4 |
5 | //This structure is used to represent a node of the trie.
6 | struct trieNode
7 | {
8 | //indicates if the word ending here is a valid word.
9 | bool isValidWord;
10 | //number of words have the prefix that is represented by this node.
11 | int wordsWithPrefix;
12 | //represent all child nodes of this node.
13 | unordered_map
14 | children;
15 |
16 | trieNode()
17 | {
18 | isValidWord = false;
19 | wordsWithPrefix = 0;
20 | }
21 |
22 | };
23 |
24 | //This class represents a generic trie data structure
25 | class TrieDS
26 | {
27 | private:
28 |
29 | trieNode *root;
30 |
31 | int totalWords;
32 |
33 | //helper function that traverse the trie and finds all word present in trie.
34 | void treeDepthTraversal(trieNode *currentNode,
35 | vector& trieToString,string prefix)
36 | {
37 | if(currentNode->isValidWord)
38 | {
39 | trieToString.push_back(prefix);
40 | }
41 | for(auto p : currentNode->children)
42 | {
43 | prefix.push_back(p.first);
44 | treeDepthTraversal(p.second, trieToString, prefix);
45 | prefix.pop_back();
46 | }
47 | }
48 |
49 | //helper function
50 | void buildSerialTrie(trieNode *root, string &serialTrie)
51 | {
52 | if(root->isValidWord)
53 | serialTrie.push_back(']');
54 | unordered_map &children = root->children;
55 | for(auto child : children)
56 | {
57 | serialTrie.push_back(child.first);
58 | buildSerialTrie(child.second, serialTrie);
59 | }
60 | serialTrie.push_back('>');
61 | }
62 |
63 | //helper function
64 | int stringToTrie(trieNode *node, string &serialTrie, int &read)
65 | {
66 | int wordSeen = 0;
67 | if(serialTrie[read] == ']')
68 | {
69 | node->isValidWord = 1;
70 | wordSeen++;
71 | read++;
72 | }
73 | else node->isValidWord = 0;
74 |
75 |
76 | unordered_map &childMap = node->children;
77 | while(serialTrie[read] != '>')
78 | {
79 | char ch = serialTrie[read++];
80 | childMap[ch] = new trieNode();
81 | wordSeen += stringToTrie(childMap[ch], serialTrie, read);
82 | }
83 | read++;
84 | node->wordsWithPrefix = wordSeen;
85 | return wordSeen;
86 | }
87 |
88 | public:
89 |
90 | //used to construct an empty trie.
91 | TrieDS()
92 | {
93 | root = new trieNode();
94 | totalWords = 0;
95 | }
96 |
97 | //Used to construct a trie initialized with a list of words.
98 | TrieDS(vector words)
99 | {
100 | totalWords = 0;
101 | root = new trieNode();
102 | for(string word : words)
103 | {
104 | insertWord(word);
105 | }
106 | }
107 |
108 | //Used to construct a trie using an existing trie given in serial format.
109 | TrieDS(string serialTrie)
110 | {
111 | root = new trieNode();
112 | int read = 0;
113 | totalWords = stringToTrie(root, serialTrie, read);
114 | }
115 |
116 | //used to insert a word in the trie.
117 | void insertWord(string word)
118 | {
119 | trieNode *currentNode = root;
120 | for(int i = 0; i < word.length(); i++)
121 | {
122 | currentNode->wordsWithPrefix++;
123 | unordered_map& childMap = (currentNode->children);
124 | auto it = childMap.find(word[i]);
125 | if(it == childMap.end())
126 | {
127 | currentNode = new trieNode();
128 | childMap[word[i]] = currentNode;
129 | }
130 | else
131 | {
132 | currentNode = childMap[word[i]];
133 | }
134 | }
135 | currentNode->wordsWithPrefix++;
136 | currentNode->isValidWord = true;
137 | totalWords++;
138 | }
139 |
140 | //gives the total number of words currently present in the trie.
141 | int numberOfWordsInTrie()
142 | {
143 | return totalWords;
144 | }
145 |
146 | //Used to convert the trie to a list of words(serialization).
147 | vector trieToList()
148 | {
149 | vector trieToString;
150 | treeDepthTraversal(root, trieToString, "");
151 | return trieToString;
152 | }
153 |
154 | //check to see if a word is present in the trie.
155 | bool findWord(string word)
156 | {
157 | int countOfWord = wordsWithThePrefix(word,0);
158 | if(countOfWord>0)
159 | return true;
160 | else return false;
161 | }
162 |
163 | //Number of words in the trie the have a given prefix.
164 | int wordsWithThePrefix(string prefix, int calledBy = 1)
165 | {
166 | if(root==NULL)
167 | return 0;
168 | trieNode *currentNode = root;
169 | for(int i = 0; i < prefix.length(); i++)
170 | {
171 | unordered_map& childMap = (currentNode->children);
172 | auto it = childMap.find(prefix[i]);
173 | if(it == childMap.end())
174 | return 0;
175 | else
176 | currentNode = childMap[prefix[i]];
177 | }
178 | if(calledBy||currentNode->isValidWord)
179 | return currentNode->wordsWithPrefix;
180 | else return 0;
181 | }
182 |
183 | string trieToString()
184 | {
185 | string serializedTrie;
186 | buildSerialTrie(root, serializedTrie);
187 | return serializedTrie;
188 | }
189 |
190 | };
191 |
192 | int main()
193 | {
194 | /*
195 | *THIS PROGRAM USES THE TEST DATA
196 | *TO BUILD A TRIE FOR THE GIVEN DATA
197 | *THEN IT CONVERTS THE TRIE INTO ITS
198 | *SERIALIZED FORM AND WRITES IT TO A FILE
199 | */
200 | freopen ("TEST_DATA.txt", "r", stdin);
201 | freopen ("SERIAL_FORM.txt", "w", stdout);
202 |
203 | vector input;
204 | string word;
205 |
206 | cin>>word;
207 | while(word != "#END#")
208 | {
209 | input.push_back(word);
210 | cin>>word;
211 | }
212 |
213 | TrieDS *myTrie = new TrieDS(input);
214 | cout<trieToString();
215 | return 0;
216 | }
217 |
--------------------------------------------------------------------------------