├── license.txt ├── predict.js ├── test-tree.json ├── test.txt └── the-adventures-of-tom-sawyer.txt /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 James Rolfs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /predict.js: -------------------------------------------------------------------------------- 1 | /*jshint strict:true, undef:true, noarg:true, immed:true, trailing:true, expr:true, maxlen:120*/ 2 | /*global process: true, console:true, require:true, define:true*/ 3 | 4 | (function () { 5 | 'use strict'; 6 | 7 | 8 | // ---------- Dependencies ---------- 9 | 10 | var fs = require('fs'); 11 | 12 | 13 | // ---------- Variables ---------- 14 | 15 | var usage = 'Usage: node predict.js [dictionary] [sequence]'; 16 | var words = []; 17 | var sequence; 18 | 19 | var keyMap = { 20 | 2: 'abc', 21 | 3: 'def', 22 | 4: 'ghi', 23 | 5: 'jkl', 24 | 6: 'mno', 25 | 7: 'pqrs', 26 | 8: 'tuv', 27 | 9: 'xyz' 28 | }; 29 | 30 | 31 | // ---------- 'class'es ---------- 32 | 33 | function Word(word, occurrences) { 34 | this.word = word; 35 | this.occurrences = occurrences; 36 | } 37 | 38 | Word.prototype.toString = function () { 39 | return this.word + ' (' + this.occurrences + ')'; 40 | }; 41 | 42 | Array.prototype.toString = function () { 43 | var string = ''; 44 | this.forEach(function (word) { 45 | string += word.toString() + '\n'; 46 | }); 47 | return string; 48 | }; 49 | 50 | 51 | // Some basic CLI parameter validation 52 | 53 | if (process.argv.length < 4) { 54 | console.log('Dictionary and number sequence required, ' + usage); 55 | return; 56 | } 57 | 58 | sequence = parseInt(process.argv[3], 10); 59 | 60 | if (typeof(sequence) !== 'number' || isNaN(sequence)) { 61 | console.log('Sequence must be a valid number sequence'); 62 | return; 63 | } 64 | 65 | 66 | // ---------- Build tree from dictionary file ---------- 67 | 68 | function buildTree() { 69 | var tree = {}; 70 | 71 | words.forEach(function (word) { 72 | var letters = word.split(''); 73 | var leaf = tree; 74 | 75 | for (var i = 0; i < letters.length; i++) { 76 | var letter = letters[i].toLowerCase(); 77 | var existing = leaf[letter]; 78 | var last = (i === letters.length - 1); 79 | 80 | // If child leaf doesn't exist, create it 81 | if (typeof(existing) === 'undefined') { 82 | // If we're at the end of the word, mark with number, don't create a leaf 83 | leaf = leaf[letter] = last ? 1 : {}; 84 | 85 | // If final leaf exists already 86 | } else if (typeof(existing) === 'number') { 87 | // Increment end mark number, to account for duplicates 88 | if (last) { 89 | leaf[letter]++; 90 | 91 | // Otherwise, if we need to continue, create leaf object with '$' marker 92 | } else { 93 | leaf = leaf[letter] = { $: existing }; 94 | } 95 | 96 | // If we're at the end of the word and at a leaf object with an 97 | // end '$' marker, increment the marker to account for duplicates 98 | } else if (typeof(existing) === 'object' && last) { 99 | if (existing.hasOwnProperty('$')) { 100 | leaf[letter].$++; 101 | } else { 102 | leaf[letter] = existing; 103 | leaf[letter].$ = 1; 104 | } 105 | 106 | // Just keep going 107 | } else { 108 | leaf = leaf[letter]; 109 | } 110 | } 111 | }); 112 | 113 | return tree; 114 | } 115 | 116 | 117 | // ---------- Traverse tree with sequence ---------- 118 | 119 | function findWords(sequence, tree, exact, words, currentWord, depth) { 120 | 121 | var current = tree; 122 | 123 | sequence = sequence.toString(); 124 | words = words || []; 125 | currentWord = currentWord || ''; 126 | depth = depth || 0; 127 | 128 | // Check each leaf on this level 129 | for (var leaf in current) { 130 | var word = currentWord; 131 | var value = current[leaf]; 132 | var key; 133 | 134 | // If the leaf key is '$' handle things one level off since we 135 | // ignore the '$' marker when digging into the tree 136 | if (leaf === '$') { 137 | key = sequence.charAt(depth - 1); 138 | if (depth >= sequence.length) { 139 | words.push(new Word(word, value)); 140 | } 141 | } else { 142 | key = sequence.charAt(depth); 143 | word += leaf; 144 | if (depth > sequence.length && typeof(value) === 'number') { 145 | words.push(new Word(word, value)); 146 | } 147 | } 148 | 149 | // If the leaf's value maps to our key or we're still tracing 150 | // the prefix to the end of the tree (`exact` is falsy), then 151 | // "we must go deeper"... 152 | if ((key && keyMap.hasOwnProperty(key) && keyMap[key].indexOf(leaf) > -1) || (!key && !exact)) { 153 | findWords(sequence, value, exact, words, word, depth + 1); 154 | } 155 | } 156 | 157 | // Yeah, not as cool when not returning the recursive function call 158 | // returns, but we gotta just rely on JS references since we may be 159 | // going more than one way down the tree and we don't want to be 160 | // breaking the leaf loop 161 | return words; 162 | } 163 | 164 | 165 | // ---------- Sort matches by occurrences ---------- 166 | 167 | function sortWords(words, sequence) { 168 | return words.sort(function (first, second) { 169 | return second.occurrences - first.occurrences; 170 | }); 171 | } 172 | 173 | 174 | // ---------- Read dictionary file ---------- 175 | 176 | // Read file from filesystem ("app" entry point) 177 | 178 | var time = new Date().getTime(); 179 | console.log('Reading dictionary file...'); 180 | 181 | fs.readFile(process.argv[2], function (error, data) { 182 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 183 | 184 | if (error) { 185 | console.log(error + '\n'); 186 | console.log('Error reading dictionary file, ' + usage); 187 | return; 188 | } 189 | 190 | time = new Date().getTime(); 191 | console.log('Parsing dictionary contents...'); 192 | words = data.toString(); 193 | words = words.replace(/[:;!?",'\.\*\[\]\d\$]/g, ''); 194 | words = words.replace(/\-\-/g, ' '); 195 | words = words.split(/\s+/g); 196 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 197 | 198 | time = new Date().getTime(); 199 | console.log('Building dictionary tree...'); 200 | var tree = buildTree(); 201 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 202 | 203 | time = new Date().getTime(); 204 | console.log('Finding exact matches...'); 205 | var exactWords = findWords(sequence, tree, true); 206 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 207 | 208 | time = new Date().getTime(); 209 | console.log('Finding all matches...'); 210 | words = findWords(sequence, tree); 211 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 212 | 213 | time = new Date().getTime(); 214 | console.log('Sorting exact matches...'); 215 | exactWords = sortWords(exactWords); 216 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 217 | 218 | time = new Date().getTime(); 219 | console.log('Sorting all matches...'); 220 | words = sortWords(words); 221 | console.log('Done. [' + (new Date().getTime() - time).toString() + 'ms]'); 222 | 223 | console.log('\n'); 224 | 225 | if (exactWords.length > 0) { 226 | console.log('Exact matches'); 227 | console.log('------------------------------'); 228 | console.log(exactWords.toString()); 229 | } else { 230 | console.log('* No exact matches :( *\n'); 231 | console.log('------------------------------\n'); 232 | } 233 | 234 | if (words.length > 0) { 235 | console.log('All matches'); 236 | console.log('------------------------------'); 237 | console.log(words.toString()); 238 | } else { 239 | console.log('* No matches :\'( *\n'); 240 | console.log('------------------------------\n'); 241 | } 242 | }); 243 | 244 | }()); -------------------------------------------------------------------------------- /test-tree.json: -------------------------------------------------------------------------------- 1 | { 2 | "b": { 3 | "a": { 4 | "r": { 5 | "$": 3, 6 | "t": { 7 | "$": 1, 8 | "e": { 9 | "n": { 10 | "d": { 11 | "e": { 12 | "r": 1 13 | } 14 | } 15 | } 16 | } 17 | }, 18 | "s": 1 19 | }, 20 | "t": 1 21 | } 22 | }, 23 | "f": { 24 | "a": { 25 | "t": 1, 26 | "r": { 27 | "$": 3, 28 | "t": { 29 | "h": { 30 | "e": { 31 | "r": 1 32 | } 33 | } 34 | } 35 | } 36 | }, 37 | "o": { 38 | "o": 1, 39 | "r": { 40 | "$": 1, 41 | "t": { 42 | "$": 2, 43 | "s": 1 44 | } 45 | } 46 | } 47 | }, 48 | "a": { 49 | "t": { 50 | "$": 1, 51 | "t": { 52 | "i": { 53 | "c": 1 54 | } 55 | }, 56 | "o": { 57 | "m": 1 58 | } 59 | }, 60 | "r": { 61 | "t": 1 62 | } 63 | } 64 | } -------------------------------------------------------------------------------- /test.txt: -------------------------------------------------------------------------------- 1 | bar bar bar bart bat bars fat far far far foo for fort fort forts farther bartender at art attic atom --------------------------------------------------------------------------------