├── .gitignore
├── README.md
├── cli.js
├── examples
    └── lex.l
├── package.json
├── regexp-lexer.js
└── tests
    ├── all-tests.js
    └── regexplexer.js


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | 
3 | # Editor bak files
4 | *~
5 | *.bak
6 | *.orig
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # jison-lex
 2 | A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer.
 3 | 
 4 | ## install
 5 | npm install jison-lex -g
 6 | 
 7 | ## usage
 8 | ```
 9 | Usage: jison-lex [file] [options]
10 | 
11 | file     file containing a lexical grammar
12 | 
13 | Options:
14 |    -o FILE, --outfile FILE       Filename and base module name of the generated parser
15 |    -t TYPE, --module-type TYPE   The type of module to generate (commonjs, js)
16 |    --version                     print version and exit
17 | ```
18 | 
19 | ## programatic usage
20 | 
21 | ```
22 | var JisonLex = require('jison-lex');
23 | 
24 | var grammar = {
25 |   rules: [
26 |     ["x", "return 'X';" ],
27 |     ["y", "return 'Y';" ],
28 |     ["$", "return 'EOF';" ]
29 |   ]
30 | };
31 | 
32 | // or load from a file
33 | // var grammar = fs.readFileSync('mylexer.l', 'utf8');
34 | 
35 | // generate source
36 | var lexerSource = JisonLex.generate(grammar);
37 | 
38 | // or create a parser in memory
39 | var lexer = new JisonLex(grammar);
40 | lexer.setInput('xyxxy');
41 | lexer.lex();
42 | // => 'X'
43 | lexer.lex();
44 | // => 'Y'
45 | 
46 | ## license
47 | MIT
48 | 


--------------------------------------------------------------------------------
/cli.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | var version = require('./package.json').version;
 4 | 
 5 | var path = require('path');
 6 | var fs = require('fs');
 7 | var lexParser = require('lex-parser');
 8 | var RegExpLexer = require('./regexp-lexer.js');
 9 | 
10 | 
11 | var opts = require("nomnom")
12 |   .script('jison-lex')
13 |   .option('file', {
14 |     flag: true,
15 |     position: 0,
16 |     help: 'file containing a lexical grammar'
17 |   })
18 |   .option('outfile', {
19 |     abbr: 'o',
20 |     metavar: 'FILE',
21 |     help: 'Filename and base module name of the generated parser'
22 |   })
23 |   .option('module-type', {
24 |     abbr: 't',
25 |     default: 'commonjs',
26 |     metavar: 'TYPE',
27 |     help: 'The type of module to generate (commonjs, js)'
28 |   })
29 |   .option('version', {
30 |     abbr: 'V',
31 |     flag: true,
32 |     help: 'print version and exit',
33 |     callback: function() {
34 |        return version;
35 |     }
36 |   });
37 | 
38 | exports.main = function (opts) {
39 |     if (opts.file) {
40 |         var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'),
41 |             name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,'');
42 | 
43 |         fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name));
44 |     } else {
45 |         readin(function (raw) {
46 |             console.log(processGrammar(raw));
47 |         });
48 |     }
49 | };
50 | 
51 | function processGrammar (file, name) {
52 |     var grammar;
53 |     try {
54 |         grammar = lexParser.parse(file);
55 |     } catch (e) {
56 |         try {
57 |             grammar = JSON.parse(file);
58 |         } catch (e2) {
59 |             throw e;
60 |         }
61 |     }
62 | 
63 |     var settings = grammar.options || {};
64 |     if (!settings.moduleType) settings.moduleType = opts['module-type'];
65 |     if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); });
66 | 
67 |     grammar.options = settings;
68 | 
69 |     return RegExpLexer.generate(grammar);
70 | }
71 | 
72 | function readin (cb) {
73 |     var stdin = process.openStdin(),
74 |         data = '';
75 | 
76 |     stdin.setEncoding('utf8');
77 |     stdin.addListener('data', function (chunk) {
78 |         data += chunk;
79 |     });
80 |     stdin.addListener('end', function () {
81 |         cb(data);
82 |     });
83 | }
84 | 
85 | if (require.main === module)
86 |     exports.main(opts.parse());
87 | 


--------------------------------------------------------------------------------
/examples/lex.l:
--------------------------------------------------------------------------------
 1 | 
 2 | NAME              [a-zA-Z_][a-zA-Z0-9_-]*
 3 | BR                \r\n|\n|\r
 4 | 
 5 | %s indented trail rules
 6 | %x code start_condition options conditions action
 7 | 
 8 | %%
 9 | 
10 | <action>"/*"(.|\n|\r)*?"*/"           return 'ACTION_BODY';
11 | <action>"//".*                        return 'ACTION_BODY';
12 | <action>"/"[^ /]*?['"{}'][^ ]*?"/"    return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
13 | <action>\"("\\\\"|'\"'|[^"])*\"       return 'ACTION_BODY';
14 | <action>"'"("\\\\"|"\'"|[^'])*"'"     return 'ACTION_BODY';
15 | <action>[/"'][^{}/"']+                return 'ACTION_BODY';
16 | <action>[^{}/"']+                     return 'ACTION_BODY';
17 | <action>"{"                           yy.depth++; return '{'
18 | <action>"}"                           yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'
19 | 
20 | <conditions>{NAME}                    return 'NAME';
21 | <conditions>">"                       this.popState(); return '>';
22 | <conditions>","                       return ',';
23 | <conditions>"*"                       return '*';
24 | 
25 | <rules>{BR}+                          /* */
26 | <rules>\s+{BR}+                       /* */
27 | <rules>\s+                            this.begin('indented')
28 | <rules>"%%"                           this.begin('code'); return '%%'
29 | <rules>[a-zA-Z0-9_]+                  return 'CHARACTER_LIT'
30 | 
31 | <options>{NAME}                       yy.options[yytext] = true
32 | <options>{BR}+                        this.begin('INITIAL')
33 | <options>\s+{BR}+                     this.begin('INITIAL')
34 | <options>\s+                          /* empty */
35 | 
36 | <start_condition>{NAME}               return 'START_COND'
37 | <start_condition>{BR}+                this.begin('INITIAL')
38 | <start_condition>\s+{BR}+             this.begin('INITIAL')
39 | <start_condition>\s+                  /* empty */
40 | 
41 | <trail>.*{BR}+                        this.begin('rules')
42 | 
43 | <indented>"{"                         yy.depth = 0; this.begin('action'); return '{'
44 | <indented>"%{"(.|{BR})*?"%}"          this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
45 | "%{"(.|{BR})*?"%}"                    yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
46 | <indented>.+                          this.begin('rules'); return 'ACTION'
47 | 
48 | "/*"(.|\n|\r)*?"*/"             /* ignore */
49 | "//".*                          /* ignore */
50 | 
51 | {BR}+                           /* */
52 | \s+                             /* */
53 | {NAME}                          return 'NAME';
54 | \"("\\\\"|'\"'|[^"])*\"         yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT';
55 | "'"("\\\\"|"\'"|[^'])*"'"       yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT';
56 | "|"                             return '|';
57 | "["("\\\\"|"\]"|[^\]])*"]"      return 'ANY_GROUP_REGEX';
58 | "(?:"                           return 'SPECIAL_GROUP';
59 | "(?="                           return 'SPECIAL_GROUP';
60 | "(?!"                           return 'SPECIAL_GROUP';
61 | "("                             return '(';
62 | ")"                             return ')';
63 | "+"                             return '+';
64 | "*"                             return '*';
65 | "?"                             return '?';
66 | "^"                             return '^';
67 | ","                             return ',';
68 | "<<EOF>>"                       return '$';
69 | "<"                             this.begin('conditions'); return '<';
70 | "/!"                            return '/!';
71 | "/"                             return '/';
72 | "\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4})      return 'ESCAPE_CHAR';
73 | "\\".                           yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR';
74 | "$"                             return '$';
75 | "."                             return '.';
76 | "%options"                      yy.options = {}; this.begin('options');
77 | "%s"                            this.begin('start_condition'); return 'START_INC';
78 | "%x"                            this.begin('start_condition'); return 'START_EXC';
79 | "%%"                            this.begin('rules'); return '%%';
80 | "{"\d+(","\s?\d+|",")?"}"       return 'RANGE_REGEX';
81 | "{"{NAME}"}"                    return 'NAME_BRACE';
82 | "{"                             return '{';
83 | "}"                             return '}';
84 | .                               /* ignore bad characters */
85 | <*><<EOF>>                      return 'EOF';
86 | 
87 | <code>(.|{BR})+                 return 'CODE';
88 | 
89 | %%
90 | 
91 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Zach Carter <zach@carter.name> (http://zaa.ch)",
 3 |   "name": "jison-lex",
 4 |   "description": "lexical analyzer generator used by jison",
 5 |   "version": "0.3.4",
 6 |   "keywords": [
 7 |     "jison",
 8 |     "parser",
 9 |     "generator",
10 |     "lexer",
11 |     "flex",
12 |     "tokenizer"
13 |   ],
14 |   "repository": {
15 |     "type": "git",
16 |     "url": "git://github.com/zaach/jison-lex.git"
17 |   },
18 |   "bugs": {
19 |     "email": "jison@librelist.com",
20 |     "url": "http://github.com/zaach/jison-lex/issues"
21 |   },
22 |   "main": "regexp-lexer",
23 |   "bin": "cli.js",
24 |   "engines": {
25 |     "node": ">=0.4"
26 |   },
27 |   "dependencies": {
28 |     "lex-parser": "0.1.x",
29 |     "nomnom": "1.5.2"
30 |   },
31 |   "devDependencies": {
32 |     "test": "0.4.4"
33 |   },
34 |   "scripts": {
35 |     "test": "node tests/all-tests.js"
36 |   },
37 |   "directories": {
38 |     "lib": "lib",
39 |     "tests": "tests"
40 |   },
41 |   "homepage": "http://jison.org"
42 | }
43 | 


--------------------------------------------------------------------------------
/regexp-lexer.js:
--------------------------------------------------------------------------------
  1 | // Basic Lexer implemented using JavaScript regular expressions
  2 | // MIT Licensed
  3 | 
  4 | "use strict";
  5 | 
  6 | var lexParser = require('lex-parser');
  7 | var version = require('./package.json').version;
  8 | 
  9 | // expand macros and convert matchers to RegExp's
 10 | function prepareRules(rules, macros, actions, tokens, startConditions, caseless) {
 11 |     var m,i,k,action,conditions,
 12 |         newRules = [];
 13 | 
 14 |     if (macros) {
 15 |         macros = prepareMacros(macros);
 16 |     }
 17 | 
 18 |     function tokenNumberReplacement (str, token) {
 19 |         return "return " + (tokens[token] || "'" + token + "'");
 20 |     }
 21 | 
 22 |     actions.push('switch($avoiding_name_collisions) {');
 23 | 
 24 |     for (i=0;i < rules.length; i++) {
 25 |         if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') {
 26 |             // implicit add to all inclusive start conditions
 27 |             for (k in startConditions) {
 28 |                 if (startConditions[k].inclusive) {
 29 |                     startConditions[k].rules.push(i);
 30 |                 }
 31 |             }
 32 |         } else if (rules[i][0][0] === '*') {
 33 |             // Add to ALL start conditions
 34 |             for (k in startConditions) {
 35 |                 startConditions[k].rules.push(i);
 36 |             }
 37 |             rules[i].shift();
 38 |         } else {
 39 |             // Add to explicit start conditions
 40 |             conditions = rules[i].shift();
 41 |             for (k=0;k<conditions.length;k++) {
 42 |                 startConditions[conditions[k]].rules.push(i);
 43 |             }
 44 |         }
 45 | 
 46 |         m = rules[i][0];
 47 |         if (typeof m === 'string') {
 48 |             for (k in macros) {
 49 |                 if (macros.hasOwnProperty(k)) {
 50 |                     m = m.split("{" + k + "}").join('(' + macros[k] + ')');
 51 |                 }
 52 |             }
 53 |             m = new RegExp("^(?:" + m + ")", caseless ? 'i':'');
 54 |         }
 55 |         newRules.push(m);
 56 |         if (typeof rules[i][1] === 'function') {
 57 |             rules[i][1] = String(rules[i][1]).replace(/^\s*function \(\)\s?\{/, '').replace(/\}\s*$/, '');
 58 |         }
 59 |         action = rules[i][1];
 60 |         if (tokens && action.match(/return '[^']+'/)) {
 61 |             action = action.replace(/return '([^']+)'/g, tokenNumberReplacement);
 62 |         }
 63 |         actions.push('case ' + i + ':' + action + '\nbreak;');
 64 |     }
 65 |     actions.push("}");
 66 | 
 67 |     return newRules;
 68 | }
 69 | 
 70 | // expand macros within macros
 71 | function prepareMacros (macros) {
 72 |     var cont = true,
 73 |         m,i,k,mnew;
 74 |     while (cont) {
 75 |         cont = false;
 76 |         for (i in macros) if (macros.hasOwnProperty(i)) {
 77 |             m = macros[i];
 78 |             for (k in macros) if (macros.hasOwnProperty(k) && i !== k) {
 79 |                 mnew = m.split("{" + k + "}").join('(' + macros[k] + ')');
 80 |                 if (mnew !== m) {
 81 |                     cont = true;
 82 |                     macros[i] = mnew;
 83 |                 }
 84 |             }
 85 |         }
 86 |     }
 87 |     return macros;
 88 | }
 89 | 
 90 | function prepareStartConditions (conditions) {
 91 |     var sc,
 92 |         hash = {};
 93 |     for (sc in conditions) if (conditions.hasOwnProperty(sc)) {
 94 |         hash[sc] = {rules:[],inclusive:!!!conditions[sc]};
 95 |     }
 96 |     return hash;
 97 | }
 98 | 
 99 | function buildActions (dict, tokens) {
100 |     var actions = [dict.actionInclude || '', "var YYSTATE=YY_START;"];
101 |     var tok;
102 |     var toks = {};
103 | 
104 |     for (tok in tokens) {
105 |         toks[tokens[tok]] = tok;
106 |     }
107 | 
108 |     if (dict.options && dict.options.flex) {
109 |         dict.rules.push([".", "console.log(yytext);"]);
110 |     }
111 | 
112 |     this.rules = prepareRules(dict.rules, dict.macros, actions, tokens && toks, this.conditions, this.options["case-insensitive"]);
113 |     var fun = actions.join("\n");
114 |     "yytext yyleng yylineno yylloc".split(' ').forEach(function (yy) {
115 |         fun = fun.replace(new RegExp("\\b(" + yy + ")\\b", "g"), "yy_.$1");
116 |     });
117 | 
118 |     return "function anonymous(yy,yy_,$avoiding_name_collisions,YY_START) {" + fun + "\n}";
119 | }
120 | 
121 | function RegExpLexer (dict, input, tokens) {
122 |     var opts = processGrammar(dict, tokens);
123 |     var source = generateModuleBody(opts);
124 |     var lexer = eval(source);
125 | 
126 |     lexer.yy = {};
127 |     if (input) {
128 |         lexer.setInput(input);
129 |     }
130 | 
131 |     lexer.generate = function () { return generateFromOpts(opts); };
132 |     lexer.generateModule = function () { return generateModule(opts); };
133 |     lexer.generateCommonJSModule = function () { return generateCommonJSModule(opts); };
134 |     lexer.generateAMDModule = function () { return generateAMDModule(opts); };
135 | 
136 |     return lexer;
137 | }
138 | 
139 | RegExpLexer.prototype = {
140 |     EOF: 1,
141 |     parseError: function parseError(str, hash) {
142 |         if (this.yy.parser) {
143 |             this.yy.parser.parseError(str, hash);
144 |         } else {
145 |             throw new Error(str);
146 |         }
147 |     },
148 | 
149 |     // resets the lexer, sets new input
150 |     setInput: function (input, yy) {
151 |         this.yy = yy || this.yy || {};
152 |         this._input = input;
153 |         this._more = this._backtrack = this.done = false;
154 |         this.yylineno = this.yyleng = 0;
155 |         this.yytext = this.matched = this.match = '';
156 |         this.conditionStack = ['INITIAL'];
157 |         this.yylloc = {
158 |             first_line: 1,
159 |             first_column: 0,
160 |             last_line: 1,
161 |             last_column: 0
162 |         };
163 |         if (this.options.ranges) {
164 |             this.yylloc.range = [0,0];
165 |         }
166 |         this.offset = 0;
167 |         return this;
168 |     },
169 | 
170 |     // consumes and returns one char from the input
171 |     input: function () {
172 |         var ch = this._input[0];
173 |         this.yytext += ch;
174 |         this.yyleng++;
175 |         this.offset++;
176 |         this.match += ch;
177 |         this.matched += ch;
178 |         var lines = ch.match(/(?:\r\n?|\n).*/g);
179 |         if (lines) {
180 |             this.yylineno++;
181 |             this.yylloc.last_line++;
182 |         } else {
183 |             this.yylloc.last_column++;
184 |         }
185 |         if (this.options.ranges) {
186 |             this.yylloc.range[1]++;
187 |         }
188 | 
189 |         this._input = this._input.slice(1);
190 |         return ch;
191 |     },
192 | 
193 |     // unshifts one char (or a string) into the input
194 |     unput: function (ch) {
195 |         var len = ch.length;
196 |         var lines = ch.split(/(?:\r\n?|\n)/g);
197 | 
198 |         this._input = ch + this._input;
199 |         this.yytext = this.yytext.substr(0, this.yytext.length - len);
200 |         //this.yyleng -= len;
201 |         this.offset -= len;
202 |         var oldLines = this.match.split(/(?:\r\n?|\n)/g);
203 |         this.match = this.match.substr(0, this.match.length - 1);
204 |         this.matched = this.matched.substr(0, this.matched.length - 1);
205 | 
206 |         if (lines.length - 1) {
207 |             this.yylineno -= lines.length - 1;
208 |         }
209 |         var r = this.yylloc.range;
210 | 
211 |         this.yylloc = {
212 |             first_line: this.yylloc.first_line,
213 |             last_line: this.yylineno + 1,
214 |             first_column: this.yylloc.first_column,
215 |             last_column: lines ?
216 |                 (lines.length === oldLines.length ? this.yylloc.first_column : 0)
217 |                  + oldLines[oldLines.length - lines.length].length - lines[0].length :
218 |               this.yylloc.first_column - len
219 |         };
220 | 
221 |         if (this.options.ranges) {
222 |             this.yylloc.range = [r[0], r[0] + this.yyleng - len];
223 |         }
224 |         this.yyleng = this.yytext.length;
225 |         return this;
226 |     },
227 | 
228 |     // When called from action, caches matched text and appends it on next action
229 |     more: function () {
230 |         this._more = true;
231 |         return this;
232 |     },
233 | 
234 |     // When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.
235 |     reject: function () {
236 |         if (this.options.backtrack_lexer) {
237 |             this._backtrack = true;
238 |         } else {
239 |             return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' + this.showPosition(), {
240 |                 text: "",
241 |                 token: null,
242 |                 line: this.yylineno
243 |             });
244 | 
245 |         }
246 |         return this;
247 |     },
248 | 
249 |     // retain first n characters of the match
250 |     less: function (n) {
251 |         this.unput(this.match.slice(n));
252 |     },
253 | 
254 |     // displays already matched input, i.e. for error messages
255 |     pastInput: function () {
256 |         var past = this.matched.substr(0, this.matched.length - this.match.length);
257 |         return (past.length > 20 ? '...':'') + past.substr(-20).replace(/\n/g, "");
258 |     },
259 | 
260 |     // displays upcoming input, i.e. for error messages
261 |     upcomingInput: function () {
262 |         var next = this.match;
263 |         if (next.length < 20) {
264 |             next += this._input.substr(0, 20-next.length);
265 |         }
266 |         return (next.substr(0,20) + (next.length > 20 ? '...' : '')).replace(/\n/g, "");
267 |     },
268 | 
269 |     // displays the character position where the lexing error occurred, i.e. for error messages
270 |     showPosition: function () {
271 |         var pre = this.pastInput();
272 |         var c = new Array(pre.length + 1).join("-");
273 |         return pre + this.upcomingInput() + "\n" + c + "^";
274 |     },
275 | 
276 |     // test the lexed token: return FALSE when not a match, otherwise return token
277 |     test_match: function(match, indexed_rule) {
278 |         var token,
279 |             lines,
280 |             backup;
281 | 
282 |         if (this.options.backtrack_lexer) {
283 |             // save context
284 |             backup = {
285 |                 yylineno: this.yylineno,
286 |                 yylloc: {
287 |                     first_line: this.yylloc.first_line,
288 |                     last_line: this.last_line,
289 |                     first_column: this.yylloc.first_column,
290 |                     last_column: this.yylloc.last_column
291 |                 },
292 |                 yytext: this.yytext,
293 |                 match: this.match,
294 |                 matches: this.matches,
295 |                 matched: this.matched,
296 |                 yyleng: this.yyleng,
297 |                 offset: this.offset,
298 |                 _more: this._more,
299 |                 _input: this._input,
300 |                 yy: this.yy,
301 |                 conditionStack: this.conditionStack.slice(0),
302 |                 done: this.done
303 |             };
304 |             if (this.options.ranges) {
305 |                 backup.yylloc.range = this.yylloc.range.slice(0);
306 |             }
307 |         }
308 | 
309 |         lines = match[0].match(/(?:\r\n?|\n).*/g);
310 |         if (lines) {
311 |             this.yylineno += lines.length;
312 |         }
313 |         this.yylloc = {
314 |             first_line: this.yylloc.last_line,
315 |             last_line: this.yylineno + 1,
316 |             first_column: this.yylloc.last_column,
317 |             last_column: lines ?
318 |                          lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length :
319 |                          this.yylloc.last_column + match[0].length
320 |         };
321 |         this.yytext += match[0];
322 |         this.match += match[0];
323 |         this.matches = match;
324 |         this.yyleng = this.yytext.length;
325 |         if (this.options.ranges) {
326 |             this.yylloc.range = [this.offset, this.offset += this.yyleng];
327 |         }
328 |         this._more = false;
329 |         this._backtrack = false;
330 |         this._input = this._input.slice(match[0].length);
331 |         this.matched += match[0];
332 |         token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]);
333 |         if (this.done && this._input) {
334 |             this.done = false;
335 |         }
336 |         if (token) {
337 |             return token;
338 |         } else if (this._backtrack) {
339 |             // recover context
340 |             for (var k in backup) {
341 |                 this[k] = backup[k];
342 |             }
343 |             return false; // rule action called reject() implying the next rule should be tested instead.
344 |         }
345 |         return false;
346 |     },
347 | 
348 |     // return next match in input
349 |     next: function () {
350 |         if (this.done) {
351 |             return this.EOF;
352 |         }
353 |         if (!this._input) {
354 |             this.done = true;
355 |         }
356 | 
357 |         var token,
358 |             match,
359 |             tempMatch,
360 |             index;
361 |         if (!this._more) {
362 |             this.yytext = '';
363 |             this.match = '';
364 |         }
365 |         var rules = this._currentRules();
366 |         for (var i = 0; i < rules.length; i++) {
367 |             tempMatch = this._input.match(this.rules[rules[i]]);
368 |             if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
369 |                 match = tempMatch;
370 |                 index = i;
371 |                 if (this.options.backtrack_lexer) {
372 |                     token = this.test_match(tempMatch, rules[i]);
373 |                     if (token !== false) {
374 |                         return token;
375 |                     } else if (this._backtrack) {
376 |                         match = false;
377 |                         continue; // rule action called reject() implying a rule MISmatch.
378 |                     } else {
379 |                         // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
380 |                         return false;
381 |                     }
382 |                 } else if (!this.options.flex) {
383 |                     break;
384 |                 }
385 |             }
386 |         }
387 |         if (match) {
388 |             token = this.test_match(match, rules[index]);
389 |             if (token !== false) {
390 |                 return token;
391 |             }
392 |             // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace)
393 |             return false;
394 |         }
395 |         if (this._input === "") {
396 |             return this.EOF;
397 |         } else {
398 |             return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), {
399 |                 text: "",
400 |                 token: null,
401 |                 line: this.yylineno
402 |             });
403 |         }
404 |     },
405 | 
406 |     // return next match that has a token
407 |     lex: function lex () {
408 |         var r = this.next();
409 |         if (r) {
410 |             return r;
411 |         } else {
412 |             return this.lex();
413 |         }
414 |     },
415 | 
416 |     // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
417 |     begin: function begin (condition) {
418 |         this.conditionStack.push(condition);
419 |     },
420 | 
421 |     // pop the previously active lexer condition state off the condition stack
422 |     popState: function popState () {
423 |         var n = this.conditionStack.length - 1;
424 |         if (n > 0) {
425 |             return this.conditionStack.pop();
426 |         } else {
427 |             return this.conditionStack[0];
428 |         }
429 |     },
430 | 
431 |     // produce the lexer rule set which is active for the currently active lexer condition state
432 |     _currentRules: function _currentRules () {
433 |         if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
434 |             return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
435 |         } else {
436 |             return this.conditions["INITIAL"].rules;
437 |         }
438 |     },
439 | 
440 |     // return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available
441 |     topState: function topState (n) {
442 |         n = this.conditionStack.length - 1 - Math.abs(n || 0);
443 |         if (n >= 0) {
444 |             return this.conditionStack[n];
445 |         } else {
446 |             return "INITIAL";
447 |         }
448 |     },
449 | 
450 |     // alias for begin(condition)
451 |     pushState: function pushState (condition) {
452 |         this.begin(condition);
453 |     },
454 | 
455 |     // return the number of states pushed
456 |     stateStackSize: function stateStackSize() {
457 |         return this.conditionStack.length;
458 |     }
459 | };
460 | 
461 | 
462 | // generate lexer source from a grammar
463 | function generate (dict, tokens) {
464 |     var opt = processGrammar(dict, tokens);
465 | 
466 |     return generateFromOpts(opt);
467 | }
468 | 
469 | // process the grammar and build final data structures and functions
470 | function processGrammar(dict, tokens) {
471 |     var opts = {};
472 |     if (typeof dict === 'string') {
473 |         dict = lexParser.parse(dict);
474 |     }
475 |     dict = dict || {};
476 | 
477 |     opts.options = dict.options || {};
478 |     opts.moduleType = opts.options.moduleType;
479 |     opts.moduleName = opts.options.moduleName;
480 | 
481 |     opts.conditions = prepareStartConditions(dict.startConditions);
482 |     opts.conditions.INITIAL = {rules:[],inclusive:true};
483 | 
484 |     opts.performAction = buildActions.call(opts, dict, tokens);
485 |     opts.conditionStack = ['INITIAL'];
486 | 
487 |     opts.moduleInclude = (dict.moduleInclude || '').trim();
488 |     return opts;
489 | }
490 | 
491 | // Assemble the final source from the processed grammar
492 | function generateFromOpts (opt) {
493 |     var code = "";
494 | 
495 |     if (opt.moduleType === 'commonjs') {
496 |         code = generateCommonJSModule(opt);
497 |     } else if (opt.moduleType === 'amd') {
498 |         code = generateAMDModule(opt);
499 |     } else {
500 |         code = generateModule(opt);
501 |     }
502 | 
503 |     return code;
504 | }
505 | 
506 | function generateModuleBody (opt) {
507 |     var functionDescriptions = {
508 |         setInput: "resets the lexer, sets new input",
509 |         input: "consumes and returns one char from the input",
510 |         unput: "unshifts one char (or a string) into the input",
511 |         more: "When called from action, caches matched text and appends it on next action",
512 |         reject: "When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.",
513 |         less: "retain first n characters of the match",
514 |         pastInput: "displays already matched input, i.e. for error messages",
515 |         upcomingInput: "displays upcoming input, i.e. for error messages",
516 |         showPosition: "displays the character position where the lexing error occurred, i.e. for error messages",
517 |         test_match: "test the lexed token: return FALSE when not a match, otherwise return token",
518 |         next: "return next match in input",
519 |         lex: "return next match that has a token",
520 |         begin: "activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)",
521 |         popState: "pop the previously active lexer condition state off the condition stack",
522 |         _currentRules: "produce the lexer rule set which is active for the currently active lexer condition state",
523 |         topState: "return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available",
524 |         pushState: "alias for begin(condition)",
525 |         stateStackSize: "return the number of states currently on the stack"
526 |     };
527 |     var out = "({\n";
528 |     var p = [];
529 |     var descr;
530 |     for (var k in RegExpLexer.prototype) {
531 |         if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) {
532 |             // copy the function description as a comment before the implementation; supports multi-line descriptions
533 |             descr = "\n";
534 |             if (functionDescriptions[k]) {
535 |                 descr += "// " + functionDescriptions[k].replace(/\n/g, "\n\/\/ ") + "\n";
536 |             }
537 |             p.push(descr + k + ":" + (RegExpLexer.prototype[k].toString() || '""'));
538 |         }
539 |     }
540 |     out += p.join(",\n");
541 | 
542 |     if (opt.options) {
543 |         out += ",\noptions: " + JSON.stringify(opt.options);
544 |     }
545 | 
546 |     out += ",\nperformAction: " + String(opt.performAction);
547 |     out += ",\nrules: [" + opt.rules + "]";
548 |     out += ",\nconditions: " + JSON.stringify(opt.conditions);
549 |     out += "\n})";
550 | 
551 |     return out;
552 | }
553 | 
554 | function generateModule(opt) {
555 |     opt = opt || {};
556 | 
557 |     var out = "/* generated by jison-lex " + version + " */";
558 |     var moduleName = opt.moduleName || "lexer";
559 | 
560 |     out += "\nvar " + moduleName + " = (function(){\nvar lexer = "
561 |           + generateModuleBody(opt);
562 | 
563 |     if (opt.moduleInclude) {
564 |         out += ";\n" + opt.moduleInclude;
565 |     }
566 | 
567 |     out += ";\nreturn lexer;\n})();";
568 | 
569 |     return out;
570 | }
571 | 
572 | function generateAMDModule(opt) {
573 |     var out = "/* generated by jison-lex " + version + " */";
574 | 
575 |     out += "define([], function(){\nvar lexer = "
576 |           + generateModuleBody(opt);
577 | 
578 |     if (opt.moduleInclude) {
579 |         out += ";\n" + opt.moduleInclude;
580 |     }
581 | 
582 |     out += ";\nreturn lexer;"
583 |          + "\n});";
584 | 
585 |     return out;
586 | }
587 | 
588 | function generateCommonJSModule(opt) {
589 |     opt = opt || {};
590 | 
591 |     var out = "";
592 |     var moduleName = opt.moduleName || "lexer";
593 | 
594 |     out += generateModule(opt);
595 |     out += "\nexports.lexer = " + moduleName;
596 |     out += ";\nexports.lex = function () { return " + moduleName + ".lex.apply(lexer, arguments); };";
597 |     return out;
598 | }
599 | 
600 | RegExpLexer.generate = generate;
601 | 
602 | module.exports = RegExpLexer;
603 | 
604 | 


--------------------------------------------------------------------------------
/tests/all-tests.js:
--------------------------------------------------------------------------------
1 | exports.testRegExpLexer = require("./regexplexer");
2 | 
3 | if (require.main === module)
4 |     process.exit(require("test").run(exports));
5 | 


--------------------------------------------------------------------------------
/tests/regexplexer.js:
--------------------------------------------------------------------------------
   1 | var RegExpLexer = require("../regexp-lexer"),
   2 |     assert = require("assert");
   3 | 
   4 | exports["test basic matchers"] = function() {
   5 |     var dict = {
   6 |         rules: [
   7 |            ["x", "return 'X';" ],
   8 |            ["y", "return 'Y';" ],
   9 |            ["$", "return 'EOF';" ]
  10 |        ]
  11 |     };
  12 | 
  13 |     var input = "xxyx";
  14 | 
  15 |     var lexer = new RegExpLexer(dict, input);
  16 |     assert.equal(lexer.lex(), "X");
  17 |     assert.equal(lexer.lex(), "X");
  18 |     assert.equal(lexer.lex(), "Y");
  19 |     assert.equal(lexer.lex(), "X");
  20 |     assert.equal(lexer.lex(), "EOF");
  21 | };
  22 | 
  23 | exports["test set yy"] = function() {
  24 |     var dict = {
  25 |         rules: [
  26 |            ["x", "return yy.x;" ],
  27 |            ["y", "return 'Y';" ],
  28 |            ["$", "return 'EOF';" ]
  29 |        ]
  30 |     };
  31 | 
  32 |     var input = "xxyx";
  33 | 
  34 |     var lexer = new RegExpLexer(dict);
  35 |     lexer.setInput(input, { x: 'EX' });
  36 |     assert.equal(lexer.lex(), "EX");
  37 | };
  38 | 
  39 | exports["test set input after"] = function() {
  40 |     var dict = {
  41 |         rules: [
  42 |            ["x", "return 'X';" ],
  43 |            ["y", "return 'Y';" ],
  44 |            ["$", "return 'EOF';" ]
  45 |        ]
  46 |     };
  47 | 
  48 |     var input = "xxyx";
  49 | 
  50 |     var lexer = new RegExpLexer(dict);
  51 |     lexer.setInput(input);
  52 | 
  53 |     assert.equal(lexer.lex(), "X");
  54 |     assert.equal(lexer.lex(), "X");
  55 |     assert.equal(lexer.lex(), "Y");
  56 |     assert.equal(lexer.lex(), "X");
  57 |     assert.equal(lexer.lex(), "EOF");
  58 | };
  59 | 
  60 | exports["test unrecognized char"] = function() {
  61 |     var dict = {
  62 |         rules: [
  63 |            ["x", "return 'X';" ],
  64 |            ["y", "return 'Y';" ],
  65 |            ["$", "return 'EOF';" ]
  66 |        ]
  67 |     };
  68 | 
  69 |     var input = "xa";
  70 | 
  71 |     var lexer = new RegExpLexer(dict, input);
  72 |     assert.equal(lexer.lex(), "X");
  73 |     assert.throws(function(){lexer.lex()}, "bad char");
  74 | };
  75 | 
  76 | exports["test macro"] = function() {
  77 |     var dict = {
  78 |         macros: {
  79 |             "digit": "[0-9]"
  80 |         },
  81 |         rules: [
  82 |            ["x", "return 'X';" ],
  83 |            ["y", "return 'Y';" ],
  84 |            ["{digit}+", "return 'NAT';" ],
  85 |            ["$", "return 'EOF';" ]
  86 |        ]
  87 |     };
  88 | 
  89 |     var input = "x12234y42";
  90 | 
  91 |     var lexer = new RegExpLexer(dict, input);
  92 |     assert.equal(lexer.lex(), "X");
  93 |     assert.equal(lexer.lex(), "NAT");
  94 |     assert.equal(lexer.lex(), "Y");
  95 |     assert.equal(lexer.lex(), "NAT");
  96 |     assert.equal(lexer.lex(), "EOF");
  97 | };
  98 | 
  99 | exports["test macro precedence"] = function() {
 100 |     var dict = {
 101 |         macros: {
 102 |             "hex": "[0-9]|[a-f]"
 103 |         },
 104 |         rules: [
 105 |            ["-", "return '-';" ],
 106 |            ["{hex}+", "return 'HEX';" ],
 107 |            ["$", "return 'EOF';" ]
 108 |        ]
 109 |     };
 110 | 
 111 |     var input = "129-abfe-42dc-ea12";
 112 | 
 113 |     var lexer = new RegExpLexer(dict, input);
 114 |     assert.equal(lexer.lex(), "HEX");
 115 |     assert.equal(lexer.lex(), "-");
 116 |     assert.equal(lexer.lex(), "HEX");
 117 |     assert.equal(lexer.lex(), "-");
 118 |     assert.equal(lexer.lex(), "HEX");
 119 |     assert.equal(lexer.lex(), "-");
 120 |     assert.equal(lexer.lex(), "HEX");
 121 |     assert.equal(lexer.lex(), "EOF");
 122 | };
 123 | 
 124 | exports["test nested macros"] = function () {
 125 |     var dict = {
 126 |         macros: {
 127 |             "digit": "[0-9]",
 128 |             "2digit": "{digit}{digit}",
 129 |             "3digit": "{2digit}{digit}"
 130 |         },
 131 |         rules: [
 132 |            ["x", "return 'X';" ],
 133 |            ["y", "return 'Y';" ],
 134 |            ["{3digit}", "return 'NNN';" ],
 135 |            ["{2digit}", "return 'NN';" ],
 136 |            ["{digit}", "return 'N';" ],
 137 |            ["$", "return 'EOF';" ]
 138 |        ]
 139 |     };
 140 | 
 141 |     var input = "x1y42y123";
 142 | 
 143 |     var lexer = new RegExpLexer(dict, input);
 144 |     assert.equal(lexer.lex(), "X");
 145 |     assert.equal(lexer.lex(), "N");
 146 |     assert.equal(lexer.lex(), "Y");
 147 |     assert.equal(lexer.lex(), "NN");
 148 |     assert.equal(lexer.lex(), "Y");
 149 |     assert.equal(lexer.lex(), "NNN");
 150 |     assert.equal(lexer.lex(), "EOF");
 151 | };
 152 | 
 153 | exports["test nested macro precedence"] = function() {
 154 |     var dict = {
 155 |         macros: {
 156 |             "hex": "[0-9]|[a-f]",
 157 |             "col": "#{hex}+"
 158 |         },
 159 |         rules: [
 160 |            ["-", "return '-';" ],
 161 |            ["{col}", "return 'HEX';" ],
 162 |            ["$", "return 'EOF';" ]
 163 |        ]
 164 |     };
 165 | 
 166 |     var input = "#129-#abfe-#42dc-#ea12";
 167 | 
 168 |     var lexer = new RegExpLexer(dict, input);
 169 |     assert.equal(lexer.lex(), "HEX");
 170 |     assert.equal(lexer.lex(), "-");
 171 |     assert.equal(lexer.lex(), "HEX");
 172 |     assert.equal(lexer.lex(), "-");
 173 |     assert.equal(lexer.lex(), "HEX");
 174 |     assert.equal(lexer.lex(), "-");
 175 |     assert.equal(lexer.lex(), "HEX");
 176 |     assert.equal(lexer.lex(), "EOF");
 177 | };
 178 | 
 179 | exports["test action include"] = function() {
 180 |     var dict = {
 181 |         rules: [
 182 |            ["x", "return included ? 'Y' : 'N';" ],
 183 |            ["$", "return 'EOF';" ]
 184 |        ],
 185 |        actionInclude: "var included = true;"
 186 |     };
 187 | 
 188 |     var input = "x";
 189 | 
 190 |     var lexer = new RegExpLexer(dict, input);
 191 |     assert.equal(lexer.lex(), "Y");
 192 |     assert.equal(lexer.lex(), "EOF");
 193 | };
 194 | 
 195 | exports["test ignored"] = function() {
 196 |     var dict = {
 197 |         rules: [
 198 |            ["x", "return 'X';" ],
 199 |            ["y", "return 'Y';" ],
 200 |            ["\\s+", "/* skip whitespace */" ],
 201 |            ["$", "return 'EOF';" ]
 202 |        ]
 203 |     };
 204 | 
 205 |     var input = "x x   y x";
 206 | 
 207 |     var lexer = new RegExpLexer(dict, input);
 208 |     assert.equal(lexer.lex(), "X");
 209 |     assert.equal(lexer.lex(), "X");
 210 |     assert.equal(lexer.lex(), "Y");
 211 |     assert.equal(lexer.lex(), "X");
 212 |     assert.equal(lexer.lex(), "EOF");
 213 | };
 214 | 
 215 | exports["test disambiguate"] = function() {
 216 |     var dict = {
 217 |         rules: [
 218 |            ["for\\b", "return 'FOR';" ],
 219 |            ["if\\b", "return 'IF';" ],
 220 |            ["[a-z]+", "return 'IDENTIFIER';" ],
 221 |            ["\\s+", "/* skip whitespace */" ],
 222 |            ["$", "return 'EOF';" ]
 223 |        ]
 224 |     };
 225 | 
 226 |     var input = "if forever for for";
 227 | 
 228 |     var lexer = new RegExpLexer(dict, input);
 229 |     assert.equal(lexer.lex(), "IF");
 230 |     assert.equal(lexer.lex(), "IDENTIFIER");
 231 |     assert.equal(lexer.lex(), "FOR");
 232 |     assert.equal(lexer.lex(), "FOR");
 233 |     assert.equal(lexer.lex(), "EOF");
 234 | };
 235 | 
 236 | exports["test yytext overwrite"] = function() {
 237 |     var dict = {
 238 |         rules: [
 239 |            ["x", "yytext = 'hi der'; return 'X';" ]
 240 |        ]
 241 |     };
 242 | 
 243 |     var input = "x";
 244 | 
 245 |     var lexer = new RegExpLexer(dict, input);
 246 |     lexer.lex();
 247 |     assert.equal(lexer.yytext, "hi der");
 248 | };
 249 | 
 250 | exports["test yylineno"] = function() {
 251 |     var dict = {
 252 |         rules: [
 253 |            ["\\s+", "/* skip whitespace */" ],
 254 |            ["x", "return 'x';" ],
 255 |            ["y", "return 'y';" ]
 256 |        ]
 257 |     };
 258 | 
 259 |     var input = "x\nxy\n\n\nx";
 260 | 
 261 |     var lexer = new RegExpLexer(dict, input);
 262 |     assert.equal(lexer.yylineno, 0);
 263 |     assert.equal(lexer.lex(), "x");
 264 |     assert.equal(lexer.lex(), "x");
 265 |     assert.equal(lexer.yylineno, 1);
 266 |     assert.equal(lexer.lex(), "y");
 267 |     assert.equal(lexer.yylineno, 1);
 268 |     assert.equal(lexer.lex(), "x");
 269 |     assert.equal(lexer.yylineno, 4);
 270 | };
 271 | 
 272 | exports["test yylloc"] = function() {
 273 |     var dict = {
 274 |         rules: [
 275 |            ["\\s+", "/* skip whitespace */" ],
 276 |            ["x", "return 'x';" ],
 277 |            ["y", "return 'y';" ]
 278 |        ]
 279 |     };
 280 | 
 281 |     var input = "x\nxy\n\n\nx";
 282 | 
 283 |     var lexer = new RegExpLexer(dict, input);
 284 |     assert.equal(lexer.lex(), "x");
 285 |     assert.equal(lexer.yylloc.first_column, 0);
 286 |     assert.equal(lexer.yylloc.last_column, 1);
 287 |     assert.equal(lexer.lex(), "x");
 288 |     assert.equal(lexer.yylloc.first_line, 2);
 289 |     assert.equal(lexer.yylloc.last_line, 2);
 290 |     assert.equal(lexer.yylloc.first_column, 0);
 291 |     assert.equal(lexer.yylloc.last_column, 1);
 292 |     assert.equal(lexer.lex(), "y");
 293 |     assert.equal(lexer.yylloc.first_line, 2);
 294 |     assert.equal(lexer.yylloc.last_line, 2);
 295 |     assert.equal(lexer.yylloc.first_column, 1);
 296 |     assert.equal(lexer.yylloc.last_column, 2);
 297 |     assert.equal(lexer.lex(), "x");
 298 |     assert.equal(lexer.yylloc.first_line, 5);
 299 |     assert.equal(lexer.yylloc.last_line, 5);
 300 |     assert.equal(lexer.yylloc.first_column, 0);
 301 |     assert.equal(lexer.yylloc.last_column, 1);
 302 | };
 303 | 
 304 | exports["test more()"] = function() {
 305 |     var dict = {
 306 |         rules: [
 307 |            ["x", "return 'X';" ],
 308 |            ['"[^"]*', function(){
 309 |                if(yytext.charAt(yyleng-1) == '\\') {
 310 |                    this.more();
 311 |                } else {
 312 |                    yytext += this.input(); // swallow end quote
 313 |                    return "STRING";
 314 |                }
 315 |             } ],
 316 |            ["$", "return 'EOF';" ]
 317 |        ]
 318 |     };
 319 | 
 320 |     var input = 'x"fgjdrtj\\"sdfsdf"x';
 321 | 
 322 |     var lexer = new RegExpLexer(dict, input);
 323 |     assert.equal(lexer.lex(), "X");
 324 |     assert.equal(lexer.lex(), "STRING");
 325 |     assert.equal(lexer.lex(), "X");
 326 |     assert.equal(lexer.lex(), "EOF");
 327 | };
 328 | 
 329 | exports["test defined token returns"] = function() {
 330 |     var tokens = {"2":"X", "3":"Y", "4":"EOF"};
 331 |     var dict = {
 332 |         rules: [
 333 |            ["x", "return 'X';" ],
 334 |            ["y", "return 'Y';" ],
 335 |            ["$", "return 'EOF';" ]
 336 |        ]
 337 |     };
 338 | 
 339 |     var input = "xxyx";
 340 | 
 341 |     var lexer = new RegExpLexer(dict, input, tokens);
 342 | 
 343 |     assert.equal(lexer.lex(), 2);
 344 |     assert.equal(lexer.lex(), 2);
 345 |     assert.equal(lexer.lex(), 3);
 346 |     assert.equal(lexer.lex(), 2);
 347 |     assert.equal(lexer.lex(), 4);
 348 | };
 349 | 
 350 | exports["test module generator from constructor"] = function() {
 351 |     var dict = {
 352 |         rules: [
 353 |            ["x", "return 'X';" ],
 354 |            ["y", "return 'Y';" ],
 355 |            ["$", "return 'EOF';" ]
 356 |        ]
 357 |     };
 358 | 
 359 |     var input = "xxyx";
 360 | 
 361 |     var lexerSource = RegExpLexer.generate(dict);
 362 |     eval(lexerSource);
 363 |     lexer.setInput(input);
 364 | 
 365 |     assert.equal(lexer.lex(), "X");
 366 |     assert.equal(lexer.lex(), "X");
 367 |     assert.equal(lexer.lex(), "Y");
 368 |     assert.equal(lexer.lex(), "X");
 369 |     assert.equal(lexer.lex(), "EOF");
 370 | };
 371 | 
 372 | exports["test module generator"] = function() {
 373 |     var dict = {
 374 |         rules: [
 375 |            ["x", "return 'X';" ],
 376 |            ["y", "return 'Y';" ],
 377 |            ["$", "return 'EOF';" ]
 378 |        ]
 379 |     };
 380 | 
 381 |     var input = "xxyx";
 382 | 
 383 |     var lexer_ = new RegExpLexer(dict);
 384 |     var lexerSource = lexer_.generateModule();
 385 |     eval(lexerSource);
 386 |     lexer.setInput(input);
 387 | 
 388 |     assert.equal(lexer.lex(), "X");
 389 |     assert.equal(lexer.lex(), "X");
 390 |     assert.equal(lexer.lex(), "Y");
 391 |     assert.equal(lexer.lex(), "X");
 392 |     assert.equal(lexer.lex(), "EOF");
 393 | };
 394 | 
 395 | exports["test generator with more complex lexer"] = function() {
 396 |     var dict = {
 397 |         rules: [
 398 |            ["x", "return 'X';" ],
 399 |            ['"[^"]*', function(){
 400 |                if(yytext.charAt(yyleng-1) == '\\') {
 401 |                    this.more();
 402 |                } else {
 403 |                    yytext += this.input(); // swallow end quote
 404 |                    return "STRING";
 405 |                }
 406 |             } ],
 407 |            ["$", "return 'EOF';" ]
 408 |        ]
 409 |     };
 410 | 
 411 |     var input = 'x"fgjdrtj\\"sdfsdf"x';
 412 | 
 413 |     var lexer_ = new RegExpLexer(dict);
 414 |     var lexerSource = lexer_.generateModule();
 415 |     eval(lexerSource);
 416 |     lexer.setInput(input);
 417 | 
 418 |     assert.equal(lexer.lex(), "X");
 419 |     assert.equal(lexer.lex(), "STRING");
 420 |     assert.equal(lexer.lex(), "X");
 421 |     assert.equal(lexer.lex(), "EOF");
 422 | };
 423 | 
 424 | exports["test commonjs module generator"] = function() {
 425 |     var dict = {
 426 |         rules: [
 427 |            ["x", "return 'X';" ],
 428 |            ["y", "return 'Y';" ],
 429 |            ["$", "return 'EOF';" ]
 430 |        ]
 431 |     };
 432 | 
 433 |     var input = "xxyx";
 434 | 
 435 |     var lexer_ = new RegExpLexer(dict);
 436 |     var lexerSource = lexer_.generateCommonJSModule();
 437 |     var exports = {};
 438 |     eval(lexerSource);
 439 |     exports.lexer.setInput(input);
 440 | 
 441 |     assert.equal(exports.lex(), "X");
 442 |     assert.equal(exports.lex(), "X");
 443 |     assert.equal(exports.lex(), "Y");
 444 |     assert.equal(exports.lex(), "X");
 445 |     assert.equal(exports.lex(), "EOF");
 446 | };
 447 | 
 448 | exports["test amd module generator"] = function() {
 449 |     var dict = {
 450 |         rules: [
 451 |            ["x", "return 'X';" ],
 452 |            ["y", "return 'Y';" ],
 453 |            ["$", "return 'EOF';" ]
 454 |        ]
 455 |     };
 456 | 
 457 |     var input = "xxyx";
 458 | 
 459 |     var lexer_ = new RegExpLexer(dict);
 460 |     var lexerSource = lexer_.generateAMDModule();
 461 | 
 462 |     var lexer;
 463 |     var define = function (_, fn) {
 464 |       lexer = fn();
 465 |     };
 466 | 
 467 |     eval(lexerSource);
 468 |     lexer.setInput(input);
 469 | 
 470 |     assert.equal(lexer.lex(), "X");
 471 |     assert.equal(lexer.lex(), "X");
 472 |     assert.equal(lexer.lex(), "Y");
 473 |     assert.equal(lexer.lex(), "X");
 474 |     assert.equal(lexer.lex(), "EOF");
 475 | };
 476 | 
 477 | exports["test DJ lexer"] = function() {
 478 |     var dict = {
 479 |     "lex": {
 480 |         "macros": {
 481 |             "digit": "[0-9]",
 482 |             "id": "[a-zA-Z][a-zA-Z0-9]*"
 483 |         },
 484 | 
 485 |         "rules": [
 486 |             ["\\/\\/.*",       "/* ignore comment */"],
 487 |             ["main\\b",     "return 'MAIN';"],
 488 |             ["class\\b",    "return 'CLASS';"],
 489 |             ["extends\\b",  "return 'EXTENDS';"],
 490 |             ["nat\\b",      "return 'NATTYPE';"],
 491 |             ["if\\b",       "return 'IF';"],
 492 |             ["else\\b",     "return 'ELSE';"],
 493 |             ["for\\b",      "return 'FOR';"],
 494 |             ["printNat\\b", "return 'PRINTNAT';"],
 495 |             ["readNat\\b",  "return 'READNAT';"],
 496 |             ["this\\b",     "return 'THIS';"],
 497 |             ["new\\b",      "return 'NEW';"],
 498 |             ["var\\b",      "return 'VAR';"],
 499 |             ["null\\b",     "return 'NUL';"],
 500 |             ["{digit}+",   "return 'NATLITERAL';"],
 501 |             ["{id}",       "return 'ID';"],
 502 |             ["==",         "return 'EQUALITY';"],
 503 |             ["=",          "return 'ASSIGN';"],
 504 |             ["\\+",        "return 'PLUS';"],
 505 |             ["-",          "return 'MINUS';"],
 506 |             ["\\*",        "return 'TIMES';"],
 507 |             [">",          "return 'GREATER';"],
 508 |             ["\\|\\|",     "return 'OR';"],
 509 |             ["!",          "return 'NOT';"],
 510 |             ["\\.",        "return 'DOT';"],
 511 |             ["\\{",        "return 'LBRACE';"],
 512 |             ["\\}",        "return 'RBRACE';"],
 513 |             ["\\(",        "return 'LPAREN';"],
 514 |             ["\\)",        "return 'RPAREN';"],
 515 |             [";",          "return 'SEMICOLON';"],
 516 |             ["\\s+",       "/* skip whitespace */"],
 517 |             [".",          "print('Illegal character');throw 'Illegal character';"],
 518 |             ["$",          "return 'ENDOFFILE';"]
 519 |         ]
 520 |     }
 521 | };
 522 | 
 523 |     var input = "class Node extends Object { \
 524 |                       var nat value    var nat value;\
 525 |                       var Node next;\
 526 |                       var nat index;\
 527 |                     }\
 528 | \
 529 |                     class List extends Object {\
 530 |                       var Node start;\
 531 | \
 532 |                       Node prepend(Node startNode) {\
 533 |                         startNode.next = start;\
 534 |                         start = startNode;\
 535 |                       }\
 536 | \
 537 |                       nat find(nat index) {\
 538 |                         var nat value;\
 539 |                         var Node node;\
 540 | \
 541 |                         for(node = start;!(node == null);node = node.next){\
 542 |                           if(node.index == index){\
 543 |                             value = node.value;\
 544 |                           } else { 0; };\
 545 |                         };\
 546 | \
 547 |                         value;\
 548 |                       }\
 549 |                     }\
 550 | \
 551 |                     main {\
 552 |                       var nat index;\
 553 |                       var nat value;\
 554 |                       var List list;\
 555 |                       var Node startNode;\
 556 | \
 557 |                       index = readNat();\
 558 |                       list = new List;\
 559 | \
 560 |                       for(0;!(index==0);0){\
 561 |                         value = readNat();\
 562 |                         startNode = new Node;\
 563 |                         startNode.index = index;\
 564 |                         startNode.value = value;\
 565 |                         list.prepend(startNode);\
 566 |                         index = readNat();\
 567 |                       };\
 568 | \
 569 |                       index = readNat();\
 570 | \
 571 |                       for(0;!(index==0);0){\
 572 |                         printNat(list.find(index));\
 573 |                         index = readNat();\
 574 |                       };\
 575 |                     }";
 576 | 
 577 |     var lexer = new RegExpLexer(dict.lex);
 578 |     lexer.setInput(input);
 579 |     var tok;
 580 |     while (tok = lexer.lex(), tok!==1) {
 581 |         assert.equal(typeof tok, "string");
 582 |     }
 583 | };
 584 | 
 585 | exports["test instantiation from string"] = function() {
 586 |     var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<<EOF>> {return 'EOF';}";
 587 | 
 588 |     var input = "x";
 589 | 
 590 |     var lexer = new RegExpLexer(dict);
 591 |     lexer.setInput(input);
 592 | 
 593 |     assert.equal(lexer.lex(), "X");
 594 |     assert.equal(lexer.lex(), "EOF");
 595 | };
 596 | 
 597 | exports["test inclusive start conditions"] = function() {
 598 |     var dict = {
 599 |         startConditions: {
 600 |             "TEST": 0,
 601 |         },
 602 |         rules: [
 603 |             ["enter-test", "this.begin('TEST');" ],
 604 |             [["TEST"], "x", "return 'T';" ],
 605 |             [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ],
 606 |             ["x", "return 'X';" ],
 607 |             ["y", "return 'Y';" ],
 608 |             ["$", "return 'EOF';" ]
 609 |         ]
 610 |     };
 611 |     var input = "xenter-testxyy";
 612 | 
 613 |     var lexer = new RegExpLexer(dict);
 614 |     lexer.setInput(input);
 615 | 
 616 |     assert.equal(lexer.lex(), "X");
 617 |     assert.equal(lexer.lex(), "T");
 618 |     assert.equal(lexer.lex(), "TY");
 619 |     assert.equal(lexer.lex(), "Y");
 620 |     assert.equal(lexer.lex(), "EOF");
 621 | };
 622 | 
 623 | exports["test exclusive start conditions"] = function() {
 624 |     var dict = {
 625 |         startConditions: {
 626 |             "EAT": 1,
 627 |         },
 628 |         rules: [
 629 |             ["\\/\\/", "this.begin('EAT');" ],
 630 |             [["EAT"], ".", "" ],
 631 |             [["EAT"], "\\n", "this.begin('INITIAL');" ],
 632 |             ["x", "return 'X';" ],
 633 |             ["y", "return 'Y';" ],
 634 |             ["$", "return 'EOF';" ]
 635 |         ]
 636 |     };
 637 |     var input = "xy//yxteadh//ste\ny";
 638 | 
 639 |     var lexer = new RegExpLexer(dict);
 640 |     lexer.setInput(input);
 641 | 
 642 |     assert.equal(lexer.lex(), "X");
 643 |     assert.equal(lexer.lex(), "Y");
 644 |     assert.equal(lexer.lex(), "Y");
 645 |     assert.equal(lexer.lex(), "EOF");
 646 | };
 647 | 
 648 | exports["test pop start condition stack"] = function() {
 649 |     var dict = {
 650 |         startConditions: {
 651 |             "EAT": 1,
 652 |         },
 653 |         rules: [
 654 |             ["\\/\\/", "this.begin('EAT');" ],
 655 |             [["EAT"], ".", "" ],
 656 |             [["EAT"], "\\n", "this.popState();" ],
 657 |             ["x", "return 'X';" ],
 658 |             ["y", "return 'Y';" ],
 659 |             ["$", "return 'EOF';" ]
 660 |         ]
 661 |     };
 662 |     var input = "xy//yxteadh//ste\ny";
 663 | 
 664 |     var lexer = new RegExpLexer(dict);
 665 |     lexer.setInput(input);
 666 | 
 667 |     assert.equal(lexer.lex(), "X");
 668 |     assert.equal(lexer.lex(), "Y");
 669 |     assert.equal(lexer.lex(), "Y");
 670 |     assert.equal(lexer.lex(), "EOF");
 671 | };
 672 | 
 673 | 
 674 | exports["test star start condition"] = function() {
 675 |     var dict = {
 676 |         startConditions: {
 677 |             "EAT": 1,
 678 |         },
 679 |         rules: [
 680 |             ["\\/\\/", "this.begin('EAT');" ],
 681 |             [["EAT"], ".", "" ],
 682 |             ["x", "return 'X';" ],
 683 |             ["y", "return 'Y';" ],
 684 |             [["*"],"$", "return 'EOF';" ]
 685 |         ]
 686 |     };
 687 |     var input = "xy//yxteadh//stey";
 688 | 
 689 |     var lexer = new RegExpLexer(dict);
 690 |     lexer.setInput(input);
 691 | 
 692 |     assert.equal(lexer.lex(), "X");
 693 |     assert.equal(lexer.lex(), "Y");
 694 |     assert.equal(lexer.lex(), "EOF");
 695 | };
 696 | 
 697 | exports["test start condition constants"] = function() {
 698 |     var dict = {
 699 |         startConditions: {
 700 |             "EAT": 1,
 701 |         },
 702 |         rules: [
 703 |             ["\\/\\/", "this.begin('EAT');" ],
 704 |             [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ],
 705 |             ["x", "if (YY_START==='INITIAL') return 'X';" ],
 706 |             ["y", "return 'Y';" ],
 707 |             [["*"],"$", "return 'EOF';" ]
 708 |         ]
 709 |     };
 710 |     var input = "xy//y";
 711 | 
 712 |     var lexer = new RegExpLexer(dict);
 713 |     lexer.setInput(input);
 714 | 
 715 |     assert.equal(lexer.lex(), "X");
 716 |     assert.equal(lexer.lex(), "Y");
 717 |     assert.equal(lexer.lex(), "E");
 718 |     assert.equal(lexer.lex(), "EOF");
 719 | };
 720 | 
 721 | exports["test unicode encoding"] = function() {
 722 |     var dict = {
 723 |         rules: [
 724 |             ["\\u2713", "return 'CHECK';" ],
 725 |             ["\\u03c0", "return 'PI';" ],
 726 |             ["y", "return 'Y';" ]
 727 |         ]
 728 |     };
 729 |     var input = "\u2713\u03c0y";
 730 | 
 731 |     var lexer = new RegExpLexer(dict);
 732 |     lexer.setInput(input);
 733 | 
 734 |     assert.equal(lexer.lex(), "CHECK");
 735 |     assert.equal(lexer.lex(), "PI");
 736 |     assert.equal(lexer.lex(), "Y");
 737 | };
 738 | 
 739 | exports["test unicode"] = function() {
 740 |     var dict = {
 741 |         rules: [
 742 |             ["π", "return 'PI';" ],
 743 |             ["y", "return 'Y';" ]
 744 |         ]
 745 |     };
 746 |     var input = "πy";
 747 | 
 748 |     var lexer = new RegExpLexer(dict);
 749 |     lexer.setInput(input);
 750 | 
 751 |     assert.equal(lexer.lex(), "PI");
 752 |     assert.equal(lexer.lex(), "Y");
 753 | };
 754 | 
 755 | exports["test longest match returns"] = function() {
 756 |     var dict = {
 757 |         rules: [
 758 |             [".", "return 'DOT';" ],
 759 |             ["cat", "return 'CAT';" ]
 760 |         ],
 761 |         options: {flex: true}
 762 |     };
 763 |     var input = "cat!";
 764 | 
 765 |     var lexer = new RegExpLexer(dict);
 766 |     lexer.setInput(input);
 767 | 
 768 |     assert.equal(lexer.lex(), "CAT");
 769 |     assert.equal(lexer.lex(), "DOT");
 770 | };
 771 | 
 772 | exports["test case insensitivity"] = function() {
 773 |     var dict = {
 774 |         rules: [
 775 |             ["cat", "return 'CAT';" ]
 776 |         ],
 777 |         options: {'case-insensitive': true}
 778 |     };
 779 |     var input = "Cat";
 780 | 
 781 |     var lexer = new RegExpLexer(dict);
 782 |     lexer.setInput(input);
 783 | 
 784 |     assert.equal(lexer.lex(), "CAT");
 785 | };
 786 | 
 787 | exports["test less"] = function() {
 788 |     var dict = {
 789 |         rules: [
 790 |             ["cat", "this.less(2); return 'CAT';" ],
 791 |             ["t", "return 'T';" ]
 792 |         ],
 793 |     };
 794 |     var input = "cat";
 795 | 
 796 |     var lexer = new RegExpLexer(dict);
 797 |     lexer.setInput(input);
 798 | 
 799 |     assert.equal(lexer.lex(), "CAT");
 800 |     assert.equal(lexer.lex(), "T");
 801 | };
 802 | 
 803 | exports["test EOF unput"] = function() {
 804 |     var dict = {
 805 |         startConditions: {
 806 |             "UN": 1,
 807 |         },
 808 |         rules: [
 809 |             ["U", "this.begin('UN');return 'U';" ],
 810 |             [["UN"],"$", "this.unput('X')" ],
 811 |             [["UN"],"X", "this.popState();return 'X';" ],
 812 |             ["$", "return 'EOF'" ]
 813 |         ]
 814 |     };
 815 |     var input = "U";
 816 | 
 817 |     var lexer = new RegExpLexer(dict);
 818 |     lexer.setInput(input);
 819 | 
 820 |     assert.equal(lexer.lex(), "U");
 821 |     assert.equal(lexer.lex(), "X");
 822 |     assert.equal(lexer.lex(), "EOF");
 823 | };
 824 | 
 825 | exports["test flex mode default rule"] = function() {
 826 |     var dict = {
 827 |         rules: [
 828 |             ["x", "return 'X';" ]
 829 |         ],
 830 |         options: {flex: true}
 831 |     };
 832 |     var input = "xyx";
 833 | 
 834 |     var lexer = new RegExpLexer(dict);
 835 |     lexer.setInput(input);
 836 | 
 837 |     assert.equal(lexer.lex(), "X");
 838 |     assert.equal(lexer.lex(), "X");
 839 | };
 840 | 
 841 | exports["test pipe precedence"] = function() {
 842 |     var dict = {
 843 |         rules: [
 844 |             ["x|y", "return 'X_Y';" ],
 845 |             [".",   "return 'N';"]
 846 |         ]
 847 |     };
 848 |     var input = "xny";
 849 | 
 850 |     var lexer = new RegExpLexer(dict);
 851 |     lexer.setInput(input);
 852 | 
 853 |     assert.equal(lexer.lex(), "X_Y");
 854 |     assert.equal(lexer.lex(), "N");
 855 |     assert.equal(lexer.lex(), "X_Y");
 856 | };
 857 | 
 858 | exports["test ranges"] = function() {
 859 |     var dict = {
 860 |         rules: [
 861 |             ["x+", "return 'X';" ],
 862 |             [".",   "return 'N';"]
 863 |         ],
 864 |         options: {ranges: true}
 865 |     };
 866 |     var input = "xxxyy";
 867 | 
 868 |     var lexer = new RegExpLexer(dict);
 869 |     lexer.setInput(input);
 870 | 
 871 |     assert.equal(lexer.lex(), "X");
 872 |     assert.deepEqual(lexer.yylloc.range, [0, 3]);
 873 | };
 874 | 
 875 | exports["test unput location"] = function() {
 876 |     var dict = {
 877 |         rules: [
 878 |             ["x+", "return 'X';" ],
 879 |             ["y\\n", "this.unput('\\n'); return 'Y';" ],
 880 |             ["\\ny", "this.unput('y'); return 'BR';" ],
 881 |             ["y", "return 'Y';" ],
 882 |             [".",   "return 'N';"]
 883 |         ],
 884 |         options: {ranges: true}
 885 |     };
 886 |     var input = "xxxy\ny";
 887 | 
 888 |     var lexer = new RegExpLexer(dict);
 889 |     lexer.setInput(input);
 890 |     console.log(lexer.rules);
 891 | 
 892 |     assert.equal(lexer.next(), "X");
 893 |     assert.deepEqual(lexer.yylloc, {first_line: 1,
 894 |                                     first_column: 0,
 895 |                                     last_line: 1,
 896 |                                     last_column: 3,
 897 |                                     range: [0, 3]});
 898 |     assert.equal(lexer.next(), "Y");
 899 |     assert.deepEqual(lexer.yylloc, {first_line: 1,
 900 |                                     first_column: 3,
 901 |                                     last_line: 1,
 902 |                                     last_column: 4,
 903 |                                     range: [3, 4]});
 904 |     assert.equal(lexer.next(), "BR");
 905 |     assert.deepEqual(lexer.yylloc, {first_line: 1,
 906 |                                     first_column: 4,
 907 |                                     last_line: 2,
 908 |                                     last_column: 0,
 909 |                                     range: [4, 5]});
 910 |     assert.equal(lexer.next(), "Y");
 911 |     assert.deepEqual(lexer.yylloc, {first_line: 2,
 912 |                                     first_column: 0,
 913 |                                     last_line: 2,
 914 |                                     last_column: 1,
 915 |                                     range: [5, 6]});
 916 | 
 917 | };
 918 | 
 919 | exports["test unput location again"] = function() {
 920 |     var dict = {
 921 |         rules: [
 922 |             ["x+", "return 'X';" ],
 923 |             ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ],
 924 |             ["\\ny", "this.unput('y'); return 'BR';" ],
 925 |             ["y", "return 'Y';" ],
 926 |             [".",   "return 'N';"]
 927 |         ],
 928 |         options: {ranges: true}
 929 |     };
 930 |     var input = "xxxy\ny\ny";
 931 | 
 932 |     var lexer = new RegExpLexer(dict);
 933 |     lexer.setInput(input);
 934 |     console.log(lexer.rules);
 935 | 
 936 |     assert.equal(lexer.next(), "X");
 937 |     assert.deepEqual(lexer.yylloc, {first_line: 1,
 938 |                                     first_column: 0,
 939 |                                     last_line: 1,
 940 |                                     last_column: 3,
 941 |                                     range: [0, 3]});
 942 |     assert.equal(lexer.next(), "YY");
 943 |     assert.deepEqual(lexer.yylloc, {first_line: 1,
 944 |                                     first_column: 3,
 945 |                                     last_line: 2,
 946 |                                     last_column: 1,
 947 |                                     range: [3, 6]});
 948 |     assert.equal(lexer.next(), "BR");
 949 |     assert.deepEqual(lexer.yylloc, {first_line: 2,
 950 |                                     first_column: 1,
 951 |                                     last_line: 3,
 952 |                                     last_column: 0,
 953 |                                     range: [6, 7]});
 954 |     assert.equal(lexer.next(), "Y");
 955 |     assert.deepEqual(lexer.yylloc, {first_line: 3,
 956 |                                     first_column: 0,
 957 |                                     last_line: 3,
 958 |                                     last_column: 1,
 959 |                                     range: [7, 8]});
 960 | 
 961 | };
 962 | 
 963 | exports["test backtracking lexer reject() method"] = function() {
 964 |     var dict = {
 965 |         rules: [
 966 |             ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
 967 |             ["[A-Z]+", "return 'WORD';" ],
 968 |             ["[0-9]+", "return 'NUM';" ]
 969 |         ],
 970 |         options: {backtrack_lexer: true}
 971 |     };
 972 |     var input = "A5";
 973 | 
 974 |     var lexer = new RegExpLexer(dict);
 975 |     lexer.setInput(input);
 976 | 
 977 |     assert.equal(lexer.lex(), "WORD");
 978 |     assert.equal(lexer.lex(), "NUM");
 979 | };
 980 | 
 981 | exports["test lexer reject() exception when not in backtracking mode"] = function() {
 982 |     var dict = {
 983 |         rules: [
 984 |             ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ],
 985 |             ["[A-Z]+", "return 'WORD';" ],
 986 |             ["[0-9]+", "return 'NUM';" ]
 987 |         ],
 988 |         options: {backtrack_lexer: false}
 989 |     };
 990 |     var input = "A5";
 991 | 
 992 |     var lexer = new RegExpLexer(dict);
 993 |     lexer.setInput(input);
 994 | 
 995 |     assert.throws(function() {
 996 |       lexer.lex();
 997 |     },
 998 |     function(err) {
 999 |       return (err instanceof Error) && /You can only invoke reject/.test(err);
1000 |     });
1001 | };
1002 | 
1003 | exports["test yytext state after unput"] = function() {
1004 |     var dict = {
1005 |         rules: [
1006 |             ["cat4", "this.unput('4'); return 'CAT';" ],
1007 |             ["4", "return 'NUMBER';" ],
1008 |             ["$", "return 'EOF';"]
1009 |         ]
1010 |     };
1011 | 
1012 |     var input = "cat4";
1013 | 
1014 |     var lexer = new RegExpLexer(dict);
1015 |     lexer.setInput(input);
1016 |     assert.equal(lexer.lex(), "CAT");
1017 |     /*the yytext should be 'cat' since we unput '4' from 'cat4' */
1018 |     assert.equal(lexer.yytext, "cat");
1019 |     assert.equal(lexer.lex(), "NUMBER");
1020 |     assert.equal(lexer.lex(), "EOF");
1021 | };
1022 | 


--------------------------------------------------------------------------------