├── .gitignore ├── .jshintrc ├── .npmignore ├── LICENSE.txt ├── README.md ├── gulpfile.js ├── lib └── regex-trie.js ├── package.json └── test ├── 00_init.js ├── 01_add.js ├── 02_trie_impl.js ├── 03_contains.js ├── 04_toregexp.js ├── 05_quotemeta.js └── 06-tostring.js /.gitignore: -------------------------------------------------------------------------------- 1 | *.un~* 2 | *.sw* 3 | node_modules/* 4 | .git* 5 | -------------------------------------------------------------------------------- /.jshintrc: -------------------------------------------------------------------------------- 1 | { 2 | "passfail" : false, 3 | "maxerr" : 100, 4 | 5 | "browser" : false, 6 | "node" : true, 7 | "rhino" : false, 8 | "couch" : false, 9 | 10 | "debug" : false, 11 | "devel" : true, 12 | 13 | "strict" : true, 14 | "globalstrict" : true, 15 | 16 | "asi" : false, 17 | "laxbreak" : false, 18 | "bitwise" : true, 19 | "boss" : false, // Tolerate assignments inside an if, for, & while 20 | "curly" : true, 21 | "eqeqeq" : true, 22 | "eqnull" : false, 23 | "evil" : false, 24 | "expr" : false, 25 | "forin" : false, 26 | "immed" : true, // Require immediate invokes to be wrapped within parens 27 | "latedef" : false, 28 | "loopfunc" : false, 29 | "noarg" : true, 30 | "regexp" : true, 31 | "regexdash" : false, 32 | "scripturl" : true, 33 | "shadow" : false, 34 | "supernew" : false, 35 | "undef" : true, 36 | 37 | "newcap" : true, // Require capitalization of all constructor functions e.g. `new F()`. 38 | "noempty" : true, // Prohibit use of empty blocks. 39 | "nonew" : true, // Prohibit use of constructors for side-effects. 40 | "nomen" : true, // Prohibit use of initial or trailing underbars in names. 41 | "onevar" : true, // Allow only one `var` statement per function. 42 | "plusplus" : true, // Prohibit use of `++` & `--`. 43 | "sub" : true, // Tolerate all forms of subscript notation besides dot notation e.g. `dict['key']` instead of `dict.key`. 44 | "trailing" : false, // Prohibit trailing whitespaces. 45 | "white" : true, // Check against strict whitespace and indentation rules. 46 | "indent" : 4 // Specify indentation spacing 47 | } 48 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | *.un~* 2 | *.sw* 3 | node_modules/* 4 | docs/* 5 | test/* 6 | .jshintrc 7 | .npmignore 8 | gulpfile.js 9 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Alex Elder 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RegexTrie 2 | Create a regular expression to match any of the phrases added to the trie (inspired by Dan Kogai's [Regexp::Trie](http://search.cpan.org/~dankogai/Regexp-Trie-0.02/lib/Regexp/Trie.pm) Perl module. 3 | 4 | ## Installation and Usage 5 | 6 | ### Node 7 | 8 | 1. `npm install regex-trie` 9 | 2. `require` and use (see the *Usage* section for more comprehensive usage instructions.) 10 | ```javascript 11 | var RegexTrie = require('regex-trie'), 12 | trie = new RegexTrie(), 13 | regex = trie.add('foo').add('bar').toRegExp(); 14 | ``` 15 | 16 | ### Browser 17 | 1. `npm install regex-trie` 18 | 2. create your application using `RegexTrie`: 19 | ```javascript 20 | // app.js 21 | var RegexTrie = require('regex-trie'), 22 | trie = new RegexTrie(), 23 | regex = trie.add('foo').add('bar').toRegExp(); 24 | 25 | console.log(regex); 26 | ``` 27 | 3. Use [browserfy](https://github.com/substack/node-browserify) to create the 28 | the browser-safe package, e.g.: `browserify app.js -o bundle.js`. 29 | 30 | ### Usage 31 | ```javascript 32 | var RegexTrie = require('regex-trie'); 33 | 34 | // Create a new RegexTrie instance 35 | var trie = new RegexTrie(); 36 | 37 | // Add phrases to the trie 38 | trie.add('foo') 39 | .add('bar') 40 | .add('baz'); 41 | 42 | // You can use an array to add phrases if you'd rather (duplicate 43 | // pharses are ignored.) 44 | trie.add(['foo', 'bar', 'baz']); 45 | 46 | // Fetch a RegExp to represent all the phrases in the trie 47 | var regex = trie.toRegExp(); // regex => /(?:foo|ba[rz])/ 48 | 49 | // What matches? 50 | var things_to_match = ['foo', 'bar', 'baz', 'bat', 'fun', 'food'], 51 | match_results = things_to_match.map(regex.test, regex); 52 | 53 | console.log(match_results); 54 | // => [ true, true, true, false, false, true ] 55 | ``` 56 | 57 | ## Methods 58 | 59 | ### `RegexTrie()` (constructor) 60 | 61 | Creates a new instance of `RegexTrie`. Currently doesn't accept any options 62 | however this will likely change as the module evolves. 63 | 64 | ### `.add(phrase_to_add)` 65 | 66 | Adds a new phrase to the trie. Accepts singleton arguments, or an array of 67 | phrases. Ignores any values which aren't literals (objects, bools, arrays, 68 | etc). 69 | 70 | ```javascript 71 | trie.add('foo') 72 | .add('bar') 73 | .add('baz') 74 | .add(['who', 'what', 'when', 'where']; 75 | ``` 76 | 77 | All numbers (except `NaN`) are coerced into strings before being added. 78 | 79 | Before adding new phrases, the trie is checked to see whether or not that 80 | phrase already exists (using `contains`). 81 | 82 | ### `.contains(phrase)` 83 | 84 | Will check to see if the trie contains a phrase which matches `phrase`, and 85 | return `true` or `false` if the phrase does or does not exist. 86 | 87 | ### `.toRegExp()` 88 | 89 | Returns a `RegExp` instance which should match each individual phrase in the 90 | tree. The trie will escape any character that matches: `/([^A-Za-z0-9_])/`. For 91 | example, if the following values are added, the pipe (OR) will be escaped: 92 | 93 | ```javascript 94 | trie.add(['foo', '|', 'bar'].toRegExp(); 95 | // => (?:foo|\||bar) 96 | ``` 97 | 98 | #### Regex Specific Details 99 | 100 | The `RegExp` returned by `regex()` is a non-capturing, un-anchored regular 101 | expression meaning it'll never capture its matches and all of the following 102 | phrases will still match: 103 | 104 | ```javascript 105 | var regex = trie.add(['foo', 'bar', 'car']).toRegExp(); 106 | 107 | ['fool', 'afool', 'bart', 'abart', 'acar', 'acard'].forEach( function (word) { 108 | console.log(regex.test(word)); 109 | }); 110 | // Output => true, true, true, true, true, true 111 | ``` 112 | 113 | ## Development 114 | 115 | `regex-trie` uses [Gulp](http://gulpjs.com/) as its build system. Currently 116 | `gulpfile` defines a few tasks: 117 | 118 | * `lint` -- `JSHint` (see `.jshintrc` for this project's settings) 119 | * `test` -- runs `mocha` from `gulp` 120 | * `docs` -- `yuidocjs` to produce development documentation 121 | * `watch` -- watches for changes to JS files in `./test/` and `./lib/` and runs the `lint` task 122 | * `default` -- by default the `watch` task runs (which runs `lint`) 123 | * `continuous` -- runs `watch` (which runs `lint`) and `test` on every JS file change. 124 | 125 | ### Development Dependencies 126 | 127 | Please see `package.json` for the latest development dependencies. At the time 128 | of writing, you'll need: 129 | 130 | ```javascript 131 | "mocha": "~1.17.1" 132 | "should": "~3.1.2" 133 | "gulp-jshint": "~1.4.0" 134 | "gulp-util": "~2.2.14" 135 | "gulp": "~3.5.2" 136 | "gulp-watch": "~0.5.0" 137 | "blanket": "~1.1.6" 138 | "gulp-yuidoc": "~0.1.0" 139 | ``` 140 | 141 | ## Testing 142 | 143 | The tests within `regex-trie` use [mocha](http://mochajs.org/) 144 | with [should.js](https://github.com/visionmedia/should.js/) assertions. To test 145 | the module, just run `mocha` from your terminal. 146 | 147 | ## TODO 148 | 149 | List of things to add aren't in any specific order. 150 | 151 | 1. Regex options to configure capturing and anchoring 152 | 2. Cache compiled trie branches (to speed up RegExp generation) 153 | 154 | ## License 155 | 156 | See `LICENSE.txt` for license rights and limitations (MIT). 157 | -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var gulp = require('gulp'), 4 | jshint = require('gulp-jshint'), 5 | watch = require('gulp-watch'), 6 | mocha = require('gulp-mocha'), 7 | yuidoc = require('gulp-yuidoc'); 8 | 9 | gulp.task('lint', function () { 10 | gulp.src(['lib/*.js', 'test/**/*.js']) 11 | .pipe(jshint()) 12 | .pipe(jshint.reporter('default')); 13 | }); 14 | 15 | gulp.task('test', function () { 16 | gulp.src('./test/*.js').pipe(mocha()); 17 | }); 18 | 19 | gulp.task('docs', function () { 20 | 21 | gulp.src('./lib/*.js') 22 | .pipe(yuidoc()) 23 | .pipe(gulp.dest('./docs')); 24 | }); 25 | 26 | gulp.task('watch', function () { 27 | gulp.watch(['lib/*.js', 'test/**/*.js'], function () { 28 | gulp.run('lint'); 29 | }); 30 | }); 31 | 32 | gulp.task('continuous', function () { 33 | 34 | gulp.watch(['lib/*.js', 'test/**/*.js'], function () { 35 | gulp.run(['lint', 'test']); 36 | }); 37 | }); 38 | 39 | gulp.task('default', ['watch']); 40 | -------------------------------------------------------------------------------- /lib/regex-trie.js: -------------------------------------------------------------------------------- 1 | var jsesc = require('jsesc'); 2 | 3 | /** 4 | * @module regex-trie 5 | */ 6 | var RegexTrie = (function () { 7 | 8 | "use strict"; 9 | 10 | /** 11 | * The `RegexTrie` class builds a regular expression from a set of phrases 12 | * added to it. It produces a non-optimised `RegExp` and only represents 13 | * literal characters; only alphanumeric or underscore ("_") characters are 14 | * left unescaped. 15 | * 16 | * @class RegexTrie 17 | * @constructor 18 | */ 19 | var RegexTrie = function () { 20 | 21 | if ( ! (this instanceof RegexTrie) ) { 22 | return new RegexTrie(); 23 | } 24 | 25 | this._num_phrases_in_trie = 0; 26 | this._trie = {}; 27 | 28 | return this; 29 | }; 30 | 31 | /** 32 | * 33 | * Phrases can be added to the trie using `add`. Elements can be wrapped in 34 | * an array before being added. Only alphanumeric values will be added. 35 | * Objects, booleans, arrays, etc will all be ignored (failed attempts to 36 | * add values are silent.) 37 | * 38 | * @method add() 39 | * @param phrase_to_add {array|string|number} 40 | * @chainable 41 | */ 42 | RegexTrie.prototype.add = function (phrase_to_add) { 43 | 44 | if ( phrase_to_add instanceof Array ) { 45 | phrase_to_add.forEach(this.add, this); 46 | } 47 | 48 | phrase_to_add = this._coerce_to_string(phrase_to_add); 49 | 50 | if ( ! this._is_phrase_valid(phrase_to_add) ) { 51 | return this; 52 | } 53 | 54 | // Has this phrase already been added? 55 | if ( this.contains(phrase_to_add) ) { 56 | return this; 57 | } 58 | 59 | var trie = this._trie; 60 | 61 | phrase_to_add.split('').forEach( function (chr) { 62 | 63 | if ( chr in trie ) { 64 | 65 | trie = trie[chr]; 66 | return; 67 | } 68 | 69 | trie[chr] = {}; 70 | trie = trie[chr]; 71 | }, this); 72 | 73 | // Set the end marker (so we know this was a complete word) 74 | trie.end = true; 75 | this._num_phrases_in_trie++; 76 | 77 | return this; 78 | }; 79 | 80 | RegexTrie.prototype.toRegExp = function () { 81 | 82 | if ( this._num_phrases_in_trie === 0 ) return; 83 | 84 | var result = this.toString(); 85 | return new RegExp(result); 86 | }; 87 | 88 | RegexTrie.prototype.toString = function () { 89 | 90 | if ( this._num_phrases_in_trie === 0 ) return; 91 | 92 | var _walk_trie = function (trie, this_arg) { 93 | 94 | var keys = Object.keys(trie), 95 | alt_group = [], 96 | char_class = [], 97 | end = false; // marks the end of a phrase 98 | 99 | keys.forEach( function (key) { 100 | 101 | var walk_result, insert; 102 | 103 | if ( key === 'end' ) { 104 | end = true; 105 | return; 106 | } 107 | 108 | walk_result = 109 | this._quotemeta(key) + _walk_trie(trie[key], this_arg); 110 | 111 | // When we have more than one key, `insert` references 112 | // the alternative regexp group, otherwise it points to 113 | // the char class group. 114 | insert = ( keys.length > 1 ) ? [].push.bind(alt_group) 115 | : [].push.bind(char_class); 116 | insert(walk_result); 117 | }, this_arg); 118 | 119 | return this_arg._to_regex(alt_group, char_class, end); 120 | }; 121 | 122 | var result = _walk_trie(this._trie, this); 123 | return result; 124 | }; 125 | 126 | RegexTrie.prototype._to_regex = function (alt_group, char_class, end) { 127 | 128 | var group_has_one_element = function (el) { 129 | return el.length === 1; 130 | }, 131 | result = ""; 132 | 133 | // Once we've finished walking through the tree we need to build 134 | // the regex match groups... 135 | if ( alt_group.length > 0 ) { 136 | 137 | if ( alt_group.length === 1 ) { 138 | // Individual elements are merged with the current result. 139 | result += alt_group[0]; 140 | } 141 | else if ( alt_group.every(group_has_one_element) ) { 142 | // When every single array in the alternative group is 143 | // a single element array, this gets flattened in to 144 | // a character class. 145 | result += ( '[' + alt_group.join('') + ']' ); 146 | } 147 | else { 148 | // Finally, build a non-capturing alternative group. 149 | result += ( '(?:' + alt_group.join('|') + ')' ); 150 | } 151 | } 152 | else if ( char_class.length > 0 ) { 153 | result += char_class[0]; 154 | } 155 | 156 | if ( end && result ) { 157 | 158 | if ( result.length === 1 ) { 159 | result += '?'; 160 | } 161 | else { 162 | result = '(?:' + result + ')?'; 163 | } 164 | } 165 | 166 | return result; 167 | }; 168 | 169 | RegexTrie.prototype.contains = function (phrase_to_fetch) { 170 | 171 | if ( ! this._is_phrase_valid(phrase_to_fetch) && 172 | this._num_phrases_in_trie > 0 ) { 173 | return false; 174 | } 175 | 176 | var trie = this._trie; 177 | 178 | // Wrap the attempts to contains in a try/catch block; any non-existant 179 | // keys will cause an exception, which we treat as 'this value does not 180 | // exist'. 181 | try { 182 | 183 | phrase_to_fetch.split('').forEach( function (chr) { 184 | trie = trie[chr]; 185 | }); 186 | 187 | return ( trie.hasOwnProperty('end') && trie.end === true ); 188 | } 189 | catch (e) { 190 | // Fall through 191 | } 192 | 193 | return false; 194 | }; 195 | 196 | RegexTrie.prototype._coerce_to_string = function (phrase) { 197 | 198 | if ( typeof phrase === 'number' && ! isNaN(phrase) ) { 199 | phrase = phrase.toString(); 200 | } 201 | 202 | return phrase; 203 | }; 204 | 205 | RegexTrie.prototype._is_phrase_valid = function (phrase) { 206 | return ( typeof phrase === 'string' && phrase.length > 0 ); 207 | }; 208 | 209 | RegexTrie.prototype._quotemeta = function (phrase) { 210 | 211 | if ( ! this._is_phrase_valid(phrase) ) { 212 | return phrase; 213 | } 214 | 215 | return phrase 216 | .replace(/([\t\n\f\r\\\$\(\)\*\+\-\.\?\[\]\^\{\|\}])/g, '\\$1') 217 | .replace(/[^\x20-\x7E]/g, jsesc); 218 | }; 219 | 220 | return RegexTrie; 221 | })(); 222 | 223 | module.exports = RegexTrie; 224 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "regex-trie", 3 | "description": "Create a regular expression to match any of the phrases added to the trie (inspired by Dan Kogai's Regexp::Trie Perl module.)", 4 | "version": "1.0.4", 5 | "homepage": "https://github.com/alexeld/regex-trie", 6 | "keywords": [ 7 | "regex", 8 | "regexp", 9 | "trie" 10 | ], 11 | "repository": { 12 | "type": "git", 13 | "url": "https://github.com/alexeld/regex-trie.git" 14 | }, 15 | "author": { 16 | "name": "Alex Elder", 17 | "email": "hello@handwritten.io", 18 | "url": "http://handwritten.io" 19 | }, 20 | "main": "./lib/regex-trie.js", 21 | "maintainers": [ 22 | { 23 | "name": "Alex Elder", 24 | "email": "hello@handwritten.io", 25 | "web": "http://www.handwritten.io" 26 | } 27 | ], 28 | "scripts": { 29 | "test": "mocha" 30 | }, 31 | "dependencies": { 32 | "jsesc": "^0.5.0" 33 | }, 34 | "devDependencies": { 35 | "mocha": "~1.17.1", 36 | "should": "~3.1.2", 37 | "gulp-jshint": "~1.4.0", 38 | "gulp-util": "~2.2.14", 39 | "gulp": "~3.5.2", 40 | "gulp-watch": "~0.5.0", 41 | "blanket": "~1.1.6", 42 | "gulp-yuidoc": "~0.1.0", 43 | "gulp-mocha": "~0.4.1" 44 | }, 45 | "config": { 46 | "blanket": { 47 | "pattern": "./lib/regex-trie.js" 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /test/00_init.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('Provide a new-agnostic constructor', function () { 6 | 7 | it('should create the same instance without needing new', function () { 8 | var trie = RegexTrie(), 9 | trie2 = new RegexTrie(); 10 | 11 | trie.should.be.instanceof(RegexTrie); 12 | trie2.should.be.instanceof(RegexTrie); 13 | 14 | trie.should.eql(trie2); 15 | }); 16 | }); 17 | 18 | -------------------------------------------------------------------------------- /test/01_add.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('#add()', function () { 6 | 7 | it('should add a single word and return itself', function () { 8 | 9 | var trie = new RegexTrie(), 10 | result = trie.add('alpha'); 11 | 12 | result.should.be.instanceof(RegexTrie); 13 | }); 14 | 15 | it('should count the number of words added to the trie', function () { 16 | 17 | var trie = new RegexTrie(); 18 | 19 | trie.add('alpha'); 20 | trie._num_phrases_in_trie.should.be.exactly(1); 21 | 22 | // Add multiple words 23 | trie.add('bravo') 24 | .add('charlie') 25 | .add('delta'); 26 | 27 | trie._num_phrases_in_trie.should.be.exactly(4); 28 | }); 29 | 30 | it('should add an array of strings', function () { 31 | 32 | var trie = new RegexTrie(); 33 | trie.add([ 34 | 'alpha', 35 | 'bravo', 36 | 'charlie', 37 | 'delta' 38 | ]); 39 | 40 | trie._num_phrases_in_trie.should.be.exactly(4); 41 | }); 42 | 43 | it('should not increment the count for an undefined value', function () { 44 | 45 | var trie = new RegexTrie(); 46 | 47 | trie.add() 48 | .add(undefined) 49 | .add(null) 50 | .add('') 51 | .add({}) 52 | .add([]) 53 | .add(this) 54 | .add(true) 55 | .add(false) 56 | .add(/foo|bar/); 57 | 58 | trie._num_phrases_in_trie.should.be.exactly(0); 59 | }); 60 | 61 | it('should coerce a number to a string before adding it', function () { 62 | 63 | var trie = new RegexTrie(); 64 | 65 | trie.add(42); 66 | trie._num_phrases_in_trie.should.be.exactly(1); 67 | 68 | trie.add(1337) 69 | .add(13) 70 | .add(37); 71 | 72 | trie._num_phrases_in_trie.should.be.exactly(4); 73 | 74 | trie.add("12") 75 | .add(34) 76 | .add(56) 77 | .add('78') 78 | .add(13.37) 79 | .add('3.14') 80 | .add(0x5a); 81 | 82 | trie._num_phrases_in_trie.should.be.exactly(11); 83 | }); 84 | 85 | it('should not add NaN', function () { 86 | 87 | var trie = new RegexTrie(); 88 | 89 | trie.add(NaN); 90 | trie._num_phrases_in_trie.should.be.exactly(0); 91 | }); 92 | 93 | it('should add an array of strings and numbers', function () { 94 | 95 | var trie = new RegexTrie(); 96 | 97 | trie.add([ 98 | 'alpha', 99 | 'bravo', 100 | 42, 101 | 1337, 102 | 0x6a 103 | ]); 104 | 105 | trie._num_phrases_in_trie.should.be.exactly(5); 106 | 107 | trie.add([ 108 | 'charlie', 109 | 'delta', 110 | {}, 111 | null, 112 | undefined, 113 | this, 114 | [], 115 | -1 116 | ]); 117 | 118 | trie._num_phrases_in_trie.should.be.exactly(8); 119 | }); 120 | 121 | it('should not add any elements from an object', function () { 122 | 123 | var trie = new RegexTrie(); 124 | 125 | trie.add({ 126 | alpha: 'alpha', 127 | bravo: 'bravo' 128 | }); 129 | 130 | trie._num_phrases_in_trie.should.be.exactly(0); 131 | 132 | trie.add([ 133 | { alpha: 'alpha' }, 134 | { bravo: 'bravo' } 135 | ]); 136 | 137 | trie._num_phrases_in_trie.should.be.exactly(0); 138 | }); 139 | }); 140 | -------------------------------------------------------------------------------- /test/02_trie_impl.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('Trie implementation tests', function () { 6 | 7 | it('should be an empty object when the trie is empty', function () { 8 | 9 | var trie = new RegexTrie(); 10 | trie._trie.should.eql({}); 11 | }); 12 | 13 | it('should be an empty object when non-string values are added', function () { 14 | 15 | var trie = new RegexTrie(); 16 | 17 | trie.add(null) 18 | .add(undefined) 19 | .add({}) 20 | .add([]); 21 | 22 | trie._trie.should.eql({}); 23 | }); 24 | 25 | it('should add a "end = true" to the last char in a phrase', function () { 26 | 27 | var trie = new RegexTrie(); 28 | 29 | trie.add('foo')._trie.should.eql({ 30 | f: { 31 | o: { 32 | o: { 33 | end: true 34 | } 35 | } 36 | } 37 | }); 38 | }); 39 | }); 40 | -------------------------------------------------------------------------------- /test/03_contains.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('#contains()', function () { 6 | 7 | it('should return undefined when no keys in trie', function () { 8 | 9 | var trie = new RegexTrie(); 10 | trie.contains('foo').should.equal(false); 11 | }); 12 | 13 | it('should return undefined when called with bad arguments', function () { 14 | 15 | var trie = new RegexTrie(), 16 | bad_inputs = [ 17 | [], 18 | {}, 19 | true, 20 | false, 21 | null, 22 | undefined, 23 | this, 24 | NaN, 25 | /foo|bar/ 26 | ]; 27 | 28 | bad_inputs.forEach( function (input) { 29 | trie.contains(input).should.equal(false); 30 | }); 31 | }); 32 | 33 | it('should return a value if entered', function () { 34 | 35 | var trie = new RegexTrie(); 36 | 37 | trie.add('alpha'); 38 | trie.contains('alpha').should.equal(true); 39 | }); 40 | 41 | it('should return false if the phrase does not exist', function () { 42 | 43 | var trie = new RegexTrie(); 44 | trie.contains('alpha').should.equal(false); 45 | }); 46 | 47 | it('should return a phrase when multiple exist', function () { 48 | 49 | var trie = new RegexTrie(), 50 | phrases = ['foo', 'bar', 'baz', 'fizz', 'buzz'], 51 | anti_phrases = ['fo', 'ba', 'fi', 'bu', 'alpha', 'bravo', 123]; 52 | 53 | // Add all the phrases, then check they all exist. 54 | phrases.forEach(trie.add, trie); 55 | phrases.every(trie.contains, trie); 56 | 57 | // None of these phrases should exist 58 | anti_phrases.forEach( function (anti_phrase) { 59 | trie.contains(anti_phrase).should.equal(false); 60 | }); 61 | }); 62 | }); 63 | -------------------------------------------------------------------------------- /test/04_toregexp.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('#toRegExp()', function () { 6 | 7 | it('should return undefined if no phrases exist', function () { 8 | 9 | var trie = new RegexTrie(), 10 | regex = trie.toRegExp(); 11 | 12 | should.not.exist(regex); 13 | }); 14 | 15 | it('should return a RegExp object if phrases exist', function () { 16 | 17 | var trie = new RegexTrie(), 18 | regex = trie.add('foo').toRegExp(); 19 | 20 | regex.should.be.instanceof(RegExp); 21 | }); 22 | 23 | it('should return a regex for a single phrase', function () { 24 | 25 | var trie = new RegexTrie(), 26 | expected = new RegExp('abc'); 27 | 28 | trie.add('abc'); 29 | trie.toRegExp().should.eql(expected); 30 | }); 31 | 32 | it('should return a regex for two phrases', function () { 33 | 34 | var trie = new RegexTrie(), 35 | expected = new RegExp('(?:foo|bar)'); 36 | 37 | trie.add(['foo', 'bar']); 38 | trie.toRegExp().should.eql(expected); 39 | }); 40 | 41 | it('should return a regex for three phrases', function () { 42 | 43 | var trie = new RegexTrie(), 44 | expected = new RegExp('(?:foo|bar|car)'); 45 | 46 | trie.add(['foo', 'bar', 'car']); 47 | trie.toRegExp().should.eql(expected); 48 | }); 49 | }); 50 | 51 | describe('#toRegExp() matching tests', function () { 52 | 53 | it('should do simple character classes for word stems', function () { 54 | 55 | var trie = new RegexTrie(), 56 | phrases = ['bar', 'baz'], 57 | do_not_match = ['ba', 'batman'], 58 | expected = new RegExp('ba[rz]'); 59 | 60 | trie.add(phrases); 61 | trie.toRegExp().should.eql(expected); 62 | phrases.should.match(expected); 63 | do_not_match.should.not.match(expected); 64 | }); 65 | 66 | it('should make simple character classes for multi word stems', function () { 67 | 68 | var trie = new RegexTrie(), 69 | phrases = ['bar', 'baz', 'foo', 'fox'], 70 | do_not_match = ['ba', 'batman', 'fo', 'foox'], 71 | expected = new RegExp('(?:ba[rz]|fo[ox])'); 72 | 73 | trie.add(phrases); 74 | trie.toRegExp().should.eql(expected); 75 | phrases.should.match(expected); 76 | do_not_match.should.not.match(expected); 77 | }); 78 | 79 | it('fixme', function () { 80 | 81 | var trie = new RegexTrie(), 82 | phrases = ['fooa', 'foob', 'fooc', 'food'], 83 | do_not_match = ['ba', 'batman', 'fo', 'foox'], 84 | expected = new RegExp('foo[abcd]'); 85 | 86 | trie.add(phrases); 87 | trie.toRegExp().should.eql(expected); 88 | phrases.should.match(expected); 89 | do_not_match.should.not.match(expected); 90 | }); 91 | 92 | it('it should respect single and multi-char phrases', function () { 93 | 94 | var trie = new RegexTrie(), 95 | phrases = ['f', 'fo', 'fox'], 96 | do_not_match = ['fa', 'ox'], 97 | expected = new RegExp('f(?:ox?)?'); 98 | 99 | trie.add(phrases); 100 | trie.toRegExp().should.eql(expected); 101 | phrases.should.match(expected); 102 | do_not_match.should.not.match(expected); 103 | }); 104 | 105 | it('should create an or group for two different phrases', function () { 106 | 107 | var trie = new RegexTrie(), 108 | phrases = ['foo', 'bar'], 109 | do_not_match = ['fo', 'ba'], 110 | expected = new RegExp('(?:foo|bar)'); 111 | 112 | trie.add(phrases); 113 | trie.toRegExp().should.eql(expected); 114 | phrases.should.match(expected); 115 | do_not_match.should.not.match(expected); 116 | }); 117 | 118 | it('should create many or groups for lots of phrases', function () { 119 | 120 | var trie = new RegexTrie(), 121 | phrases = ['foo', 'bar', 'car', 'dog', 'goal', 'hotel'], 122 | do_not_match = ['fo', 'ba', 'cat', 'snake', 'goat', 'hotal'], 123 | expected = new RegExp('(?:foo|bar|car|dog|goal|hotel)'); 124 | 125 | trie.add(phrases); 126 | trie.toRegExp().should.eql(expected); 127 | phrases.should.match(expected); 128 | do_not_match.should.not.match(expected); 129 | }); 130 | 131 | it('should handle one root with a few stems', function () { 132 | 133 | var trie = new RegexTrie(), 134 | phrases = ['foods', 'foo', 'food'], 135 | do_not_match = ['fod', 'foood', 'fds'], 136 | expected = new RegExp('foo(?:ds?)?'); 137 | 138 | trie.add(phrases); 139 | trie.toRegExp().should.eql(expected); 140 | phrases.should.match(expected); 141 | do_not_match.should.not.match(expected); 142 | }); 143 | 144 | it('should escape meta characters', function () { 145 | 146 | var trie = new RegexTrie(), 147 | phrases = ['foo|bar'], 148 | do_not_match = ['foo', 'bar'], 149 | expected = new RegExp('foo\\|bar'); 150 | 151 | 152 | trie.add(phrases); 153 | trie.toRegExp().should.eql(expected); 154 | phrases.should.match(expected); 155 | do_not_match.should.not.match(expected); 156 | }); 157 | 158 | it('should escape meta characters and not mangle regex', function () { 159 | 160 | var trie = new RegexTrie(), 161 | phrases = ['^(foo|bar]', 'car[a-z]zoo'], 162 | do_not_match = ['foo', 'bar', 'foo|bar', 'cargzoo'], 163 | expected = new RegExp('(?:\\^\\(foo\\|bar\\]|car\\[a\\-z\\]zoo)'); 164 | 165 | trie.add(phrases); 166 | trie.toRegExp().should.eql(expected); 167 | phrases.should.match(expected); 168 | do_not_match.should.not.match(expected); 169 | }); 170 | 171 | it('should correctly escape pipes added between phrases', function () { 172 | 173 | var trie = new RegexTrie(), 174 | phrases = ['foo', '|', 'bar'], 175 | expected = new RegExp('(?:foo|\\||bar)'); 176 | 177 | trie.add(phrases).toRegExp().should.eql(expected); 178 | }); 179 | }); 180 | 181 | describe('#toRegExp() options tests', function () { 182 | 183 | it('should do simple character classes for word stems', function () { 184 | 185 | var trie = new RegexTrie(), 186 | phrases = ['bar', 'baz'], 187 | do_not_match = ['ba', 'batman'], 188 | expected = new RegExp('ba[rz]'); 189 | 190 | trie.add(phrases); 191 | trie.toRegExp().should.eql(expected); 192 | phrases.should.match(expected); 193 | do_not_match.should.not.match(expected); 194 | }); 195 | 196 | }); 197 | -------------------------------------------------------------------------------- /test/05_quotemeta.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('#_quotemeta()', function () { 6 | 7 | it('should leave ASCII word-chars unchanged', function () { 8 | 9 | var trie = new RegexTrie(), 10 | result = trie._quotemeta('foo'); 11 | 12 | result.should.eql('foo'); 13 | }); 14 | 15 | it('should return any non-string values', function () { 16 | 17 | var trie = new RegexTrie(), 18 | i = 0, 19 | objects = [ {}, [], null, undefined, this, true, false ], 20 | result; 21 | 22 | for ( ; i < objects.length; i++ ) { 23 | 24 | result = trie._quotemeta(objects[i]); 25 | 26 | if ( typeof result === 'undefined' || 27 | ( ! result && typeof result !== 'boolean' ) ) { 28 | 29 | should.not.exist(result); 30 | continue; 31 | } 32 | else { 33 | objects[i].should.eql(result); 34 | } 35 | } 36 | }); 37 | 38 | it('should escape non letter, phrase, or underscore chars', function () { 39 | 40 | var trie = new RegexTrie(), 41 | result = trie._quotemeta('^'); 42 | 43 | result.should.eql('\\^'); 44 | }); 45 | 46 | it('should escape non letter, phrase, or underscore chars', function () { 47 | 48 | var trie = new RegexTrie(), 49 | chars = '^%$#()[]/.,;|', 50 | result = trie._quotemeta(chars), 51 | expected = "\\^%\\$#\\(\\)\\[\\]/\\.,;\\|"; 52 | 53 | result.should.eql(expected); 54 | 55 | // Also test that this is a valid RegExp 56 | try { 57 | new RegExp(result); 58 | should.be.ok(); 59 | } 60 | catch (e) { } 61 | }); 62 | 63 | it('should escape meta chars, leaving non-meta chars alone', function () { 64 | 65 | var trie = new RegexTrie(), 66 | regex = trie.regex(), 67 | chars = '^foo|bar|farr$', 68 | result = trie._quotemeta(chars), 69 | expected = '\\^foo\\|bar\\|farr\\$'; 70 | 71 | result.should.eql(expected); 72 | 73 | try { 74 | new RegExp(result); 75 | should.be.ok(); 76 | } 77 | catch (e) { } 78 | }); 79 | 80 | it('should be able to cope with lots of brackets', function () { 81 | 82 | var trie = new RegexTrie(), 83 | chars = '[[[[[[[[[[]]]]]]]]]]((()))())(((()))))(()))', 84 | result = trie._quotemeta(chars), 85 | expected = chars 86 | .split('') 87 | .map( function (chr) { return '\\' + chr; }) 88 | .join(''); 89 | 90 | result.should.eql(expected); 91 | 92 | try { 93 | new RegExp(result); 94 | should.be.ok(); 95 | } 96 | catch (e) { } 97 | }); 98 | 99 | it('should escape simple regex chars', function () { 100 | 101 | var trie = new RegexTrie(), 102 | 103 | regex = trie.regex(), 104 | chars = 'foo|bar', 105 | result = trie._quotemeta(chars), 106 | expected = 'foo\\|bar'; 107 | 108 | result.should.eql(expected); 109 | }); 110 | 111 | it('should escape non-ASCII symbols', function () { 112 | 113 | var trie = new RegexTrie(), 114 | 115 | regex = trie.regex(), 116 | chars = 'foo\xA9bar', 117 | result = trie._quotemeta(chars), 118 | expected = 'foo\\xA9bar'; 119 | 120 | result.should.eql(expected); 121 | }); 122 | 123 | it('should escape astral characters', function () { 124 | 125 | var trie = new RegexTrie(), 126 | 127 | regex = trie.regex(), 128 | chars = 'foo\uD834\uDF06bar', 129 | result = trie._quotemeta(chars), 130 | expected = 'foo\\uD834\\uDF06bar'; 131 | 132 | result.should.eql(expected); 133 | }); 134 | 135 | }); 136 | -------------------------------------------------------------------------------- /test/06-tostring.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | should = require('should'), 3 | RegexTrie = require('../lib/regex-trie.js'); 4 | 5 | describe('#toString()', function () { 6 | 7 | it('should return undefined if no phrases exist', function () { 8 | 9 | var trie = new RegexTrie(), 10 | regex = trie.toString(); 11 | 12 | should.not.exist(regex); 13 | }); 14 | 15 | it('should return a string representing a regex if phrases exist', function () { 16 | 17 | var trie = new RegexTrie(), 18 | regex = trie.add('foo').toString(); 19 | 20 | regex.should.be.instanceof(String); 21 | }); 22 | 23 | it('should return a string representing a regex for a single phrase', function () { 24 | 25 | var trie = new RegexTrie(), 26 | expected = 'abc'; 27 | 28 | trie.add('abc'); 29 | trie.toString().should.eql(expected); 30 | }); 31 | 32 | it('should return a string representing a regex for two phrases', function () { 33 | 34 | var trie = new RegexTrie(), 35 | expected = '(?:foo|bar)'; 36 | 37 | trie.add(['foo', 'bar']); 38 | trie.toString().should.eql(expected); 39 | }); 40 | 41 | it('should return a regex for three phrases', function () { 42 | 43 | var trie = new RegexTrie(), 44 | expected = '(?:foo|bar|car)'; 45 | 46 | trie.add(['foo', 'bar', 'car']); 47 | trie.toString().should.eql(expected); 48 | }); 49 | }); 50 | 51 | describe('#toString() matching tests', function () { 52 | 53 | it('should do simple character classes for word stems', function () { 54 | 55 | var trie = new RegexTrie(), 56 | phrases = ['bar', 'baz'], 57 | expected = 'ba[rz]'; 58 | 59 | trie.add(phrases); 60 | trie.toString().should.eql(expected); 61 | }); 62 | 63 | it('should make simple character classes for multi word stems', function () { 64 | 65 | var trie = new RegexTrie(), 66 | phrases = ['bar', 'baz', 'foo', 'fox'], 67 | expected = '(?:ba[rz]|fo[ox])'; 68 | 69 | trie.add(phrases); 70 | trie.toString().should.eql(expected); 71 | }); 72 | 73 | it('fixme', function () { 74 | 75 | var trie = new RegexTrie(), 76 | phrases = ['fooa', 'foob', 'fooc', 'food'], 77 | expected = 'foo[abcd]'; 78 | 79 | trie.add(phrases); 80 | trie.toString().should.eql(expected); 81 | }); 82 | 83 | it('it should respect single and multi-char phrases', function () { 84 | 85 | var trie = new RegexTrie(), 86 | phrases = ['f', 'fo', 'fox'], 87 | expected = 'f(?:ox?)?'; 88 | 89 | trie.add(phrases); 90 | trie.toString().should.eql(expected); 91 | }); 92 | 93 | it('should create an or group for two different phrases', function () { 94 | 95 | var trie = new RegexTrie(), 96 | phrases = ['foo', 'bar'], 97 | expected = '(?:foo|bar)'; 98 | 99 | trie.add(phrases); 100 | trie.toString().should.eql(expected); 101 | }); 102 | 103 | it('should create many or groups for lots of phrases', function () { 104 | 105 | var trie = new RegexTrie(), 106 | phrases = ['foo', 'bar', 'car', 'dog', 'goal', 'hotel'], 107 | expected = '(?:foo|bar|car|dog|goal|hotel)'; 108 | 109 | trie.add(phrases); 110 | trie.toString().should.eql(expected); 111 | }); 112 | 113 | it('should handle one root with a few stems', function () { 114 | 115 | var trie = new RegexTrie(), 116 | phrases = ['foods', 'foo', 'food'], 117 | expected = 'foo(?:ds?)?'; 118 | 119 | trie.add(phrases); 120 | trie.toString().should.eql(expected); 121 | }); 122 | 123 | it('should escape meta characters', function () { 124 | 125 | var trie = new RegexTrie(), 126 | phrases = ['foo|bar'], 127 | expected = 'foo\\|bar'; 128 | 129 | 130 | trie.add(phrases); 131 | trie.toString().should.eql(expected); 132 | }); 133 | 134 | it('should escape meta characters and not mangle regex', function () { 135 | 136 | var trie = new RegexTrie(), 137 | phrases = ['^(foo|bar]', 'car[a-z]zoo'], 138 | expected = '(?:\\^\\(foo\\|bar\\]|car\\[a\\-z\\]zoo)'; 139 | 140 | trie.add(phrases); 141 | trie.toString().should.eql(expected); 142 | }); 143 | 144 | it('should correctly escape pipes added between phrases', function () { 145 | 146 | var trie = new RegexTrie(), 147 | phrases = ['foo', '|', 'bar'], 148 | expected = '(?:foo|\\||bar)'; 149 | 150 | trie.add(phrases).toString().should.eql(expected); 151 | }); 152 | }); 153 | 154 | describe('#toString() options tests', function () { 155 | 156 | it('should do simple character classes for word stems', function () { 157 | 158 | var trie = new RegexTrie(), 159 | phrases = ['bar', 'baz'], 160 | expected = 'ba[rz]'; 161 | 162 | trie.add(phrases); 163 | trie.toString().should.eql(expected); 164 | }); 165 | 166 | }); 167 | --------------------------------------------------------------------------------