├── README.markdown ├── didYouMean.js └── demo.html /README.markdown: -------------------------------------------------------------------------------- 1 | # DidYouMean.js 2 | DidYouMean takes a query and returns similar words based on a dictionary of words that you feed it. 3 | 4 | ## Demo 5 | Take a look at the [demo](http://dl.dropbox.com/u/46441/didYouMean/demo.html) 6 | 7 | ## Example usage 8 | ``` js 9 | var dym = new DidYouMean(['hello', 'goodbye']); 10 | var suggestions = dym.query('helo'); // this returns ['hello'] 11 | ``` 12 | 13 | ### Implementation 14 | A [BK-Tree](http://en.wikipedia.org/wiki/BK-tree) is created with your dictionary words as nodes. When queried, it uses [Levenshtein Distance](http://en.wikipedia.org/wiki/Levenshtein_Distance) to compare how similar your query is to the words in the tree. If a word is within a certain edit distance threshold (default: 2), it's returned as a suggestion. 15 | -------------------------------------------------------------------------------- /didYouMean.js: -------------------------------------------------------------------------------- 1 | function levenshteinDistance(string1, string2) { 2 | var matrix = new Array(); 3 | 4 | for(var i = 0; i <= string1.length; i++) { 5 | matrix[i] = [i]; 6 | } 7 | 8 | for(var j = 0; j <= string2.length; j++) { 9 | matrix[0][j] = j; 10 | } 11 | 12 | for(var i = 1; i <= string1.length; i++) { 13 | for(var j = 1; j <= string2.length; j++) { 14 | if(string1[i - 1] == string2[j - 1]) { 15 | matrix[i][j] = matrix[i - 1][j - 1]; 16 | } else { 17 | matrix[i][j] = Math.min(matrix[i][j - 1], matrix[i - 1][j], matrix[i - 1][j - 1]) + 1; 18 | } 19 | } 20 | } 21 | 22 | 23 | return matrix[string1.length][string2.length]; 24 | } 25 | 26 | function DidYouMean(words, maxDistance) { 27 | this.words = words; 28 | this.root = new BKNode(words[0]); 29 | this.maxDistance = maxDistance || 2; 30 | 31 | this.construct(); 32 | } 33 | 34 | DidYouMean.prototype = { 35 | construct: function() { 36 | for(var i = 1; i < this.words.length; i++) { 37 | var word = this.words[i]; 38 | var currentNode = this.root; 39 | 40 | var distance = levenshteinDistance(word, currentNode.word); 41 | 42 | while(typeof currentNode.children[distance - 1] != 'undefined') { 43 | currentNode = currentNode.children[distance - 1]; 44 | distance = levenshteinDistance(word, currentNode.word); 45 | } 46 | 47 | currentNode.children[distance - 1] = new BKNode(word, distance); 48 | } 49 | }, 50 | 51 | query: function(term) { 52 | var results = []; 53 | 54 | this.inspectNode(term, this.root, results); 55 | return results; 56 | }, 57 | 58 | inspectNode: function(term, node, results) { 59 | var distance = levenshteinDistance(term, node.word); 60 | 61 | if(distance <= this.maxDistance) { 62 | results.push(node.word); 63 | } 64 | 65 | var min = Math.max(1, distance - this.maxDistance); 66 | var max = distance + this.maxDistance; 67 | 68 | for(var i = min - 1; i <= max - 1; i++) { 69 | if(typeof node.children[i] != 'undefined') { 70 | this.inspectNode(term, node.children[i], results); 71 | } 72 | } 73 | } 74 | } 75 | 76 | function BKNode(word, n) { 77 | this.word = word; 78 | this.n = n; 79 | this.children = []; 80 | } 81 | -------------------------------------------------------------------------------- /demo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Did you mean? 6 | 7 | 8 | 9 | 10 | 11 | 19 | 20 | 64 | 65 | 66 | 67 |
68 |
69 |
70 |

Words to search on:

71 | 72 |
73 | 74 |
75 |

Query:

76 | 77 | 78 |
79 |
80 | 81 |
82 | 83 |
84 |
85 | 88 | 91 | 92 |
93 |
94 |
95 | 96 | 97 | --------------------------------------------------------------------------------