├── .gitignore ├── README.md ├── cli.js ├── examples └── lex.l ├── package.json ├── regexp-lexer.js └── tests ├── all-tests.js └── regexplexer.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | 3 | # Editor bak files 4 | *~ 5 | *.bak 6 | *.orig 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jison-lex 2 | A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer. 3 | 4 | ## install 5 | npm install jison-lex -g 6 | 7 | ## usage 8 | ``` 9 | Usage: jison-lex [file] [options] 10 | 11 | file file containing a lexical grammar 12 | 13 | Options: 14 | -o FILE, --outfile FILE Filename and base module name of the generated parser 15 | -t TYPE, --module-type TYPE The type of module to generate (commonjs, js) 16 | --version print version and exit 17 | ``` 18 | 19 | ## programatic usage 20 | 21 | ``` 22 | var JisonLex = require('jison-lex'); 23 | 24 | var grammar = { 25 | rules: [ 26 | ["x", "return 'X';" ], 27 | ["y", "return 'Y';" ], 28 | ["$", "return 'EOF';" ] 29 | ] 30 | }; 31 | 32 | // or load from a file 33 | // var grammar = fs.readFileSync('mylexer.l', 'utf8'); 34 | 35 | // generate source 36 | var lexerSource = JisonLex.generate(grammar); 37 | 38 | // or create a parser in memory 39 | var lexer = new JisonLex(grammar); 40 | lexer.setInput('xyxxy'); 41 | lexer.lex(); 42 | // => 'X' 43 | lexer.lex(); 44 | // => 'Y' 45 | 46 | ## license 47 | MIT 48 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var version = require('./package.json').version; 4 | 5 | var path = require('path'); 6 | var fs = require('fs'); 7 | var lexParser = require('lex-parser'); 8 | var RegExpLexer = require('./regexp-lexer.js'); 9 | 10 | 11 | var opts = require("nomnom") 12 | .script('jison-lex') 13 | .option('file', { 14 | flag: true, 15 | position: 0, 16 | help: 'file containing a lexical grammar' 17 | }) 18 | .option('outfile', { 19 | abbr: 'o', 20 | metavar: 'FILE', 21 | help: 'Filename and base module name of the generated parser' 22 | }) 23 | .option('module-type', { 24 | abbr: 't', 25 | default: 'commonjs', 26 | metavar: 'TYPE', 27 | help: 'The type of module to generate (commonjs, js)' 28 | }) 29 | .option('version', { 30 | abbr: 'V', 31 | flag: true, 32 | help: 'print version and exit', 33 | callback: function() { 34 | return version; 35 | } 36 | }); 37 | 38 | exports.main = function (opts) { 39 | if (opts.file) { 40 | var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'), 41 | name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,''); 42 | 43 | fs.writeFileSync(opts.outfile||(name + '.js'), processGrammar(raw, name)); 44 | } else { 45 | readin(function (raw) { 46 | console.log(processGrammar(raw)); 47 | }); 48 | } 49 | }; 50 | 51 | function processGrammar (file, name) { 52 | var grammar; 53 | try { 54 | grammar = lexParser.parse(file); 55 | } catch (e) { 56 | try { 57 | grammar = JSON.parse(file); 58 | } catch (e2) { 59 | throw e; 60 | } 61 | } 62 | 63 | var settings = grammar.options || {}; 64 | if (!settings.moduleType) settings.moduleType = opts['module-type']; 65 | if (!settings.moduleName && name) settings.moduleName = name.replace(/-\w/g, function (match){ return match.charAt(1).toUpperCase(); }); 66 | 67 | grammar.options = settings; 68 | 69 | return RegExpLexer.generate(grammar); 70 | } 71 | 72 | function readin (cb) { 73 | var stdin = process.openStdin(), 74 | data = ''; 75 | 76 | stdin.setEncoding('utf8'); 77 | stdin.addListener('data', function (chunk) { 78 | data += chunk; 79 | }); 80 | stdin.addListener('end', function () { 81 | cb(data); 82 | }); 83 | } 84 | 85 | if (require.main === module) 86 | exports.main(opts.parse()); 87 | -------------------------------------------------------------------------------- /examples/lex.l: -------------------------------------------------------------------------------- 1 | 2 | NAME [a-zA-Z_][a-zA-Z0-9_-]* 3 | BR \r\n|\n|\r 4 | 5 | %s indented trail rules 6 | %x code start_condition options conditions action 7 | 8 | %% 9 | 10 | "/*"(.|\n|\r)*?"*/" return 'ACTION_BODY'; 11 | "//".* return 'ACTION_BODY'; 12 | "/"[^ /]*?['"{}'][^ ]*?"/" return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces) 13 | \"("\\\\"|'\"'|[^"])*\" return 'ACTION_BODY'; 14 | "'"("\\\\"|"\'"|[^'])*"'" return 'ACTION_BODY'; 15 | [/"'][^{}/"']+ return 'ACTION_BODY'; 16 | [^{}/"']+ return 'ACTION_BODY'; 17 | "{" yy.depth++; return '{' 18 | "}" yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}' 19 | 20 | {NAME} return 'NAME'; 21 | ">" this.popState(); return '>'; 22 | "," return ','; 23 | "*" return '*'; 24 | 25 | {BR}+ /* */ 26 | \s+{BR}+ /* */ 27 | \s+ this.begin('indented') 28 | "%%" this.begin('code'); return '%%' 29 | [a-zA-Z0-9_]+ return 'CHARACTER_LIT' 30 | 31 | {NAME} yy.options[yytext] = true 32 | {BR}+ this.begin('INITIAL') 33 | \s+{BR}+ this.begin('INITIAL') 34 | \s+ /* empty */ 35 | 36 | {NAME} return 'START_COND' 37 | {BR}+ this.begin('INITIAL') 38 | \s+{BR}+ this.begin('INITIAL') 39 | \s+ /* empty */ 40 | 41 | .*{BR}+ this.begin('rules') 42 | 43 | "{" yy.depth = 0; this.begin('action'); return '{' 44 | "%{"(.|{BR})*?"%}" this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION' 45 | "%{"(.|{BR})*?"%}" yytext = yytext.substr(2, yytext.length-4); return 'ACTION' 46 | .+ this.begin('rules'); return 'ACTION' 47 | 48 | "/*"(.|\n|\r)*?"*/" /* ignore */ 49 | "//".* /* ignore */ 50 | 51 | {BR}+ /* */ 52 | \s+ /* */ 53 | {NAME} return 'NAME'; 54 | \"("\\\\"|'\"'|[^"])*\" yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT'; 55 | "'"("\\\\"|"\'"|[^'])*"'" yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT'; 56 | "|" return '|'; 57 | "["("\\\\"|"\]"|[^\]])*"]" return 'ANY_GROUP_REGEX'; 58 | "(?:" return 'SPECIAL_GROUP'; 59 | "(?=" return 'SPECIAL_GROUP'; 60 | "(?!" return 'SPECIAL_GROUP'; 61 | "(" return '('; 62 | ")" return ')'; 63 | "+" return '+'; 64 | "*" return '*'; 65 | "?" return '?'; 66 | "^" return '^'; 67 | "," return ','; 68 | "<>" return '$'; 69 | "<" this.begin('conditions'); return '<'; 70 | "/!" return '/!'; 71 | "/" return '/'; 72 | "\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4}) return 'ESCAPE_CHAR'; 73 | "\\". yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR'; 74 | "$" return '$'; 75 | "." return '.'; 76 | "%options" yy.options = {}; this.begin('options'); 77 | "%s" this.begin('start_condition'); return 'START_INC'; 78 | "%x" this.begin('start_condition'); return 'START_EXC'; 79 | "%%" this.begin('rules'); return '%%'; 80 | "{"\d+(","\s?\d+|",")?"}" return 'RANGE_REGEX'; 81 | "{"{NAME}"}" return 'NAME_BRACE'; 82 | "{" return '{'; 83 | "}" return '}'; 84 | . /* ignore bad characters */ 85 | <*><> return 'EOF'; 86 | 87 | (.|{BR})+ return 'CODE'; 88 | 89 | %% 90 | 91 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Zach Carter (http://zaa.ch)", 3 | "name": "jison-lex", 4 | "description": "lexical analyzer generator used by jison", 5 | "version": "0.3.4", 6 | "keywords": [ 7 | "jison", 8 | "parser", 9 | "generator", 10 | "lexer", 11 | "flex", 12 | "tokenizer" 13 | ], 14 | "repository": { 15 | "type": "git", 16 | "url": "git://github.com/zaach/jison-lex.git" 17 | }, 18 | "bugs": { 19 | "email": "jison@librelist.com", 20 | "url": "http://github.com/zaach/jison-lex/issues" 21 | }, 22 | "main": "regexp-lexer", 23 | "bin": "cli.js", 24 | "engines": { 25 | "node": ">=0.4" 26 | }, 27 | "dependencies": { 28 | "lex-parser": "0.1.x", 29 | "nomnom": "1.5.2" 30 | }, 31 | "devDependencies": { 32 | "test": "0.4.4" 33 | }, 34 | "scripts": { 35 | "test": "node tests/all-tests.js" 36 | }, 37 | "directories": { 38 | "lib": "lib", 39 | "tests": "tests" 40 | }, 41 | "homepage": "http://jison.org" 42 | } 43 | -------------------------------------------------------------------------------- /regexp-lexer.js: -------------------------------------------------------------------------------- 1 | // Basic Lexer implemented using JavaScript regular expressions 2 | // MIT Licensed 3 | 4 | "use strict"; 5 | 6 | var lexParser = require('lex-parser'); 7 | var version = require('./package.json').version; 8 | 9 | // expand macros and convert matchers to RegExp's 10 | function prepareRules(rules, macros, actions, tokens, startConditions, caseless) { 11 | var m,i,k,action,conditions, 12 | newRules = []; 13 | 14 | if (macros) { 15 | macros = prepareMacros(macros); 16 | } 17 | 18 | function tokenNumberReplacement (str, token) { 19 | return "return " + (tokens[token] || "'" + token + "'"); 20 | } 21 | 22 | actions.push('switch($avoiding_name_collisions) {'); 23 | 24 | for (i=0;i < rules.length; i++) { 25 | if (Object.prototype.toString.apply(rules[i][0]) !== '[object Array]') { 26 | // implicit add to all inclusive start conditions 27 | for (k in startConditions) { 28 | if (startConditions[k].inclusive) { 29 | startConditions[k].rules.push(i); 30 | } 31 | } 32 | } else if (rules[i][0][0] === '*') { 33 | // Add to ALL start conditions 34 | for (k in startConditions) { 35 | startConditions[k].rules.push(i); 36 | } 37 | rules[i].shift(); 38 | } else { 39 | // Add to explicit start conditions 40 | conditions = rules[i].shift(); 41 | for (k=0;k 20 ? '...':'') + past.substr(-20).replace(/\n/g, ""); 258 | }, 259 | 260 | // displays upcoming input, i.e. for error messages 261 | upcomingInput: function () { 262 | var next = this.match; 263 | if (next.length < 20) { 264 | next += this._input.substr(0, 20-next.length); 265 | } 266 | return (next.substr(0,20) + (next.length > 20 ? '...' : '')).replace(/\n/g, ""); 267 | }, 268 | 269 | // displays the character position where the lexing error occurred, i.e. for error messages 270 | showPosition: function () { 271 | var pre = this.pastInput(); 272 | var c = new Array(pre.length + 1).join("-"); 273 | return pre + this.upcomingInput() + "\n" + c + "^"; 274 | }, 275 | 276 | // test the lexed token: return FALSE when not a match, otherwise return token 277 | test_match: function(match, indexed_rule) { 278 | var token, 279 | lines, 280 | backup; 281 | 282 | if (this.options.backtrack_lexer) { 283 | // save context 284 | backup = { 285 | yylineno: this.yylineno, 286 | yylloc: { 287 | first_line: this.yylloc.first_line, 288 | last_line: this.last_line, 289 | first_column: this.yylloc.first_column, 290 | last_column: this.yylloc.last_column 291 | }, 292 | yytext: this.yytext, 293 | match: this.match, 294 | matches: this.matches, 295 | matched: this.matched, 296 | yyleng: this.yyleng, 297 | offset: this.offset, 298 | _more: this._more, 299 | _input: this._input, 300 | yy: this.yy, 301 | conditionStack: this.conditionStack.slice(0), 302 | done: this.done 303 | }; 304 | if (this.options.ranges) { 305 | backup.yylloc.range = this.yylloc.range.slice(0); 306 | } 307 | } 308 | 309 | lines = match[0].match(/(?:\r\n?|\n).*/g); 310 | if (lines) { 311 | this.yylineno += lines.length; 312 | } 313 | this.yylloc = { 314 | first_line: this.yylloc.last_line, 315 | last_line: this.yylineno + 1, 316 | first_column: this.yylloc.last_column, 317 | last_column: lines ? 318 | lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length : 319 | this.yylloc.last_column + match[0].length 320 | }; 321 | this.yytext += match[0]; 322 | this.match += match[0]; 323 | this.matches = match; 324 | this.yyleng = this.yytext.length; 325 | if (this.options.ranges) { 326 | this.yylloc.range = [this.offset, this.offset += this.yyleng]; 327 | } 328 | this._more = false; 329 | this._backtrack = false; 330 | this._input = this._input.slice(match[0].length); 331 | this.matched += match[0]; 332 | token = this.performAction.call(this, this.yy, this, indexed_rule, this.conditionStack[this.conditionStack.length - 1]); 333 | if (this.done && this._input) { 334 | this.done = false; 335 | } 336 | if (token) { 337 | return token; 338 | } else if (this._backtrack) { 339 | // recover context 340 | for (var k in backup) { 341 | this[k] = backup[k]; 342 | } 343 | return false; // rule action called reject() implying the next rule should be tested instead. 344 | } 345 | return false; 346 | }, 347 | 348 | // return next match in input 349 | next: function () { 350 | if (this.done) { 351 | return this.EOF; 352 | } 353 | if (!this._input) { 354 | this.done = true; 355 | } 356 | 357 | var token, 358 | match, 359 | tempMatch, 360 | index; 361 | if (!this._more) { 362 | this.yytext = ''; 363 | this.match = ''; 364 | } 365 | var rules = this._currentRules(); 366 | for (var i = 0; i < rules.length; i++) { 367 | tempMatch = this._input.match(this.rules[rules[i]]); 368 | if (tempMatch && (!match || tempMatch[0].length > match[0].length)) { 369 | match = tempMatch; 370 | index = i; 371 | if (this.options.backtrack_lexer) { 372 | token = this.test_match(tempMatch, rules[i]); 373 | if (token !== false) { 374 | return token; 375 | } else if (this._backtrack) { 376 | match = false; 377 | continue; // rule action called reject() implying a rule MISmatch. 378 | } else { 379 | // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace) 380 | return false; 381 | } 382 | } else if (!this.options.flex) { 383 | break; 384 | } 385 | } 386 | } 387 | if (match) { 388 | token = this.test_match(match, rules[index]); 389 | if (token !== false) { 390 | return token; 391 | } 392 | // else: this is a lexer rule which consumes input without producing a token (e.g. whitespace) 393 | return false; 394 | } 395 | if (this._input === "") { 396 | return this.EOF; 397 | } else { 398 | return this.parseError('Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(), { 399 | text: "", 400 | token: null, 401 | line: this.yylineno 402 | }); 403 | } 404 | }, 405 | 406 | // return next match that has a token 407 | lex: function lex () { 408 | var r = this.next(); 409 | if (r) { 410 | return r; 411 | } else { 412 | return this.lex(); 413 | } 414 | }, 415 | 416 | // activates a new lexer condition state (pushes the new lexer condition state onto the condition stack) 417 | begin: function begin (condition) { 418 | this.conditionStack.push(condition); 419 | }, 420 | 421 | // pop the previously active lexer condition state off the condition stack 422 | popState: function popState () { 423 | var n = this.conditionStack.length - 1; 424 | if (n > 0) { 425 | return this.conditionStack.pop(); 426 | } else { 427 | return this.conditionStack[0]; 428 | } 429 | }, 430 | 431 | // produce the lexer rule set which is active for the currently active lexer condition state 432 | _currentRules: function _currentRules () { 433 | if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) { 434 | return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules; 435 | } else { 436 | return this.conditions["INITIAL"].rules; 437 | } 438 | }, 439 | 440 | // return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available 441 | topState: function topState (n) { 442 | n = this.conditionStack.length - 1 - Math.abs(n || 0); 443 | if (n >= 0) { 444 | return this.conditionStack[n]; 445 | } else { 446 | return "INITIAL"; 447 | } 448 | }, 449 | 450 | // alias for begin(condition) 451 | pushState: function pushState (condition) { 452 | this.begin(condition); 453 | }, 454 | 455 | // return the number of states pushed 456 | stateStackSize: function stateStackSize() { 457 | return this.conditionStack.length; 458 | } 459 | }; 460 | 461 | 462 | // generate lexer source from a grammar 463 | function generate (dict, tokens) { 464 | var opt = processGrammar(dict, tokens); 465 | 466 | return generateFromOpts(opt); 467 | } 468 | 469 | // process the grammar and build final data structures and functions 470 | function processGrammar(dict, tokens) { 471 | var opts = {}; 472 | if (typeof dict === 'string') { 473 | dict = lexParser.parse(dict); 474 | } 475 | dict = dict || {}; 476 | 477 | opts.options = dict.options || {}; 478 | opts.moduleType = opts.options.moduleType; 479 | opts.moduleName = opts.options.moduleName; 480 | 481 | opts.conditions = prepareStartConditions(dict.startConditions); 482 | opts.conditions.INITIAL = {rules:[],inclusive:true}; 483 | 484 | opts.performAction = buildActions.call(opts, dict, tokens); 485 | opts.conditionStack = ['INITIAL']; 486 | 487 | opts.moduleInclude = (dict.moduleInclude || '').trim(); 488 | return opts; 489 | } 490 | 491 | // Assemble the final source from the processed grammar 492 | function generateFromOpts (opt) { 493 | var code = ""; 494 | 495 | if (opt.moduleType === 'commonjs') { 496 | code = generateCommonJSModule(opt); 497 | } else if (opt.moduleType === 'amd') { 498 | code = generateAMDModule(opt); 499 | } else { 500 | code = generateModule(opt); 501 | } 502 | 503 | return code; 504 | } 505 | 506 | function generateModuleBody (opt) { 507 | var functionDescriptions = { 508 | setInput: "resets the lexer, sets new input", 509 | input: "consumes and returns one char from the input", 510 | unput: "unshifts one char (or a string) into the input", 511 | more: "When called from action, caches matched text and appends it on next action", 512 | reject: "When called from action, signals the lexer that this rule fails to match the input, so the next matching rule (regex) should be tested instead.", 513 | less: "retain first n characters of the match", 514 | pastInput: "displays already matched input, i.e. for error messages", 515 | upcomingInput: "displays upcoming input, i.e. for error messages", 516 | showPosition: "displays the character position where the lexing error occurred, i.e. for error messages", 517 | test_match: "test the lexed token: return FALSE when not a match, otherwise return token", 518 | next: "return next match in input", 519 | lex: "return next match that has a token", 520 | begin: "activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)", 521 | popState: "pop the previously active lexer condition state off the condition stack", 522 | _currentRules: "produce the lexer rule set which is active for the currently active lexer condition state", 523 | topState: "return the currently active lexer condition state; when an index argument is provided it produces the N-th previous condition state, if available", 524 | pushState: "alias for begin(condition)", 525 | stateStackSize: "return the number of states currently on the stack" 526 | }; 527 | var out = "({\n"; 528 | var p = []; 529 | var descr; 530 | for (var k in RegExpLexer.prototype) { 531 | if (RegExpLexer.prototype.hasOwnProperty(k) && k.indexOf("generate") === -1) { 532 | // copy the function description as a comment before the implementation; supports multi-line descriptions 533 | descr = "\n"; 534 | if (functionDescriptions[k]) { 535 | descr += "// " + functionDescriptions[k].replace(/\n/g, "\n\/\/ ") + "\n"; 536 | } 537 | p.push(descr + k + ":" + (RegExpLexer.prototype[k].toString() || '""')); 538 | } 539 | } 540 | out += p.join(",\n"); 541 | 542 | if (opt.options) { 543 | out += ",\noptions: " + JSON.stringify(opt.options); 544 | } 545 | 546 | out += ",\nperformAction: " + String(opt.performAction); 547 | out += ",\nrules: [" + opt.rules + "]"; 548 | out += ",\nconditions: " + JSON.stringify(opt.conditions); 549 | out += "\n})"; 550 | 551 | return out; 552 | } 553 | 554 | function generateModule(opt) { 555 | opt = opt || {}; 556 | 557 | var out = "/* generated by jison-lex " + version + " */"; 558 | var moduleName = opt.moduleName || "lexer"; 559 | 560 | out += "\nvar " + moduleName + " = (function(){\nvar lexer = " 561 | + generateModuleBody(opt); 562 | 563 | if (opt.moduleInclude) { 564 | out += ";\n" + opt.moduleInclude; 565 | } 566 | 567 | out += ";\nreturn lexer;\n})();"; 568 | 569 | return out; 570 | } 571 | 572 | function generateAMDModule(opt) { 573 | var out = "/* generated by jison-lex " + version + " */"; 574 | 575 | out += "define([], function(){\nvar lexer = " 576 | + generateModuleBody(opt); 577 | 578 | if (opt.moduleInclude) { 579 | out += ";\n" + opt.moduleInclude; 580 | } 581 | 582 | out += ";\nreturn lexer;" 583 | + "\n});"; 584 | 585 | return out; 586 | } 587 | 588 | function generateCommonJSModule(opt) { 589 | opt = opt || {}; 590 | 591 | var out = ""; 592 | var moduleName = opt.moduleName || "lexer"; 593 | 594 | out += generateModule(opt); 595 | out += "\nexports.lexer = " + moduleName; 596 | out += ";\nexports.lex = function () { return " + moduleName + ".lex.apply(lexer, arguments); };"; 597 | return out; 598 | } 599 | 600 | RegExpLexer.generate = generate; 601 | 602 | module.exports = RegExpLexer; 603 | 604 | -------------------------------------------------------------------------------- /tests/all-tests.js: -------------------------------------------------------------------------------- 1 | exports.testRegExpLexer = require("./regexplexer"); 2 | 3 | if (require.main === module) 4 | process.exit(require("test").run(exports)); 5 | -------------------------------------------------------------------------------- /tests/regexplexer.js: -------------------------------------------------------------------------------- 1 | var RegExpLexer = require("../regexp-lexer"), 2 | assert = require("assert"); 3 | 4 | exports["test basic matchers"] = function() { 5 | var dict = { 6 | rules: [ 7 | ["x", "return 'X';" ], 8 | ["y", "return 'Y';" ], 9 | ["$", "return 'EOF';" ] 10 | ] 11 | }; 12 | 13 | var input = "xxyx"; 14 | 15 | var lexer = new RegExpLexer(dict, input); 16 | assert.equal(lexer.lex(), "X"); 17 | assert.equal(lexer.lex(), "X"); 18 | assert.equal(lexer.lex(), "Y"); 19 | assert.equal(lexer.lex(), "X"); 20 | assert.equal(lexer.lex(), "EOF"); 21 | }; 22 | 23 | exports["test set yy"] = function() { 24 | var dict = { 25 | rules: [ 26 | ["x", "return yy.x;" ], 27 | ["y", "return 'Y';" ], 28 | ["$", "return 'EOF';" ] 29 | ] 30 | }; 31 | 32 | var input = "xxyx"; 33 | 34 | var lexer = new RegExpLexer(dict); 35 | lexer.setInput(input, { x: 'EX' }); 36 | assert.equal(lexer.lex(), "EX"); 37 | }; 38 | 39 | exports["test set input after"] = function() { 40 | var dict = { 41 | rules: [ 42 | ["x", "return 'X';" ], 43 | ["y", "return 'Y';" ], 44 | ["$", "return 'EOF';" ] 45 | ] 46 | }; 47 | 48 | var input = "xxyx"; 49 | 50 | var lexer = new RegExpLexer(dict); 51 | lexer.setInput(input); 52 | 53 | assert.equal(lexer.lex(), "X"); 54 | assert.equal(lexer.lex(), "X"); 55 | assert.equal(lexer.lex(), "Y"); 56 | assert.equal(lexer.lex(), "X"); 57 | assert.equal(lexer.lex(), "EOF"); 58 | }; 59 | 60 | exports["test unrecognized char"] = function() { 61 | var dict = { 62 | rules: [ 63 | ["x", "return 'X';" ], 64 | ["y", "return 'Y';" ], 65 | ["$", "return 'EOF';" ] 66 | ] 67 | }; 68 | 69 | var input = "xa"; 70 | 71 | var lexer = new RegExpLexer(dict, input); 72 | assert.equal(lexer.lex(), "X"); 73 | assert.throws(function(){lexer.lex()}, "bad char"); 74 | }; 75 | 76 | exports["test macro"] = function() { 77 | var dict = { 78 | macros: { 79 | "digit": "[0-9]" 80 | }, 81 | rules: [ 82 | ["x", "return 'X';" ], 83 | ["y", "return 'Y';" ], 84 | ["{digit}+", "return 'NAT';" ], 85 | ["$", "return 'EOF';" ] 86 | ] 87 | }; 88 | 89 | var input = "x12234y42"; 90 | 91 | var lexer = new RegExpLexer(dict, input); 92 | assert.equal(lexer.lex(), "X"); 93 | assert.equal(lexer.lex(), "NAT"); 94 | assert.equal(lexer.lex(), "Y"); 95 | assert.equal(lexer.lex(), "NAT"); 96 | assert.equal(lexer.lex(), "EOF"); 97 | }; 98 | 99 | exports["test macro precedence"] = function() { 100 | var dict = { 101 | macros: { 102 | "hex": "[0-9]|[a-f]" 103 | }, 104 | rules: [ 105 | ["-", "return '-';" ], 106 | ["{hex}+", "return 'HEX';" ], 107 | ["$", "return 'EOF';" ] 108 | ] 109 | }; 110 | 111 | var input = "129-abfe-42dc-ea12"; 112 | 113 | var lexer = new RegExpLexer(dict, input); 114 | assert.equal(lexer.lex(), "HEX"); 115 | assert.equal(lexer.lex(), "-"); 116 | assert.equal(lexer.lex(), "HEX"); 117 | assert.equal(lexer.lex(), "-"); 118 | assert.equal(lexer.lex(), "HEX"); 119 | assert.equal(lexer.lex(), "-"); 120 | assert.equal(lexer.lex(), "HEX"); 121 | assert.equal(lexer.lex(), "EOF"); 122 | }; 123 | 124 | exports["test nested macros"] = function () { 125 | var dict = { 126 | macros: { 127 | "digit": "[0-9]", 128 | "2digit": "{digit}{digit}", 129 | "3digit": "{2digit}{digit}" 130 | }, 131 | rules: [ 132 | ["x", "return 'X';" ], 133 | ["y", "return 'Y';" ], 134 | ["{3digit}", "return 'NNN';" ], 135 | ["{2digit}", "return 'NN';" ], 136 | ["{digit}", "return 'N';" ], 137 | ["$", "return 'EOF';" ] 138 | ] 139 | }; 140 | 141 | var input = "x1y42y123"; 142 | 143 | var lexer = new RegExpLexer(dict, input); 144 | assert.equal(lexer.lex(), "X"); 145 | assert.equal(lexer.lex(), "N"); 146 | assert.equal(lexer.lex(), "Y"); 147 | assert.equal(lexer.lex(), "NN"); 148 | assert.equal(lexer.lex(), "Y"); 149 | assert.equal(lexer.lex(), "NNN"); 150 | assert.equal(lexer.lex(), "EOF"); 151 | }; 152 | 153 | exports["test nested macro precedence"] = function() { 154 | var dict = { 155 | macros: { 156 | "hex": "[0-9]|[a-f]", 157 | "col": "#{hex}+" 158 | }, 159 | rules: [ 160 | ["-", "return '-';" ], 161 | ["{col}", "return 'HEX';" ], 162 | ["$", "return 'EOF';" ] 163 | ] 164 | }; 165 | 166 | var input = "#129-#abfe-#42dc-#ea12"; 167 | 168 | var lexer = new RegExpLexer(dict, input); 169 | assert.equal(lexer.lex(), "HEX"); 170 | assert.equal(lexer.lex(), "-"); 171 | assert.equal(lexer.lex(), "HEX"); 172 | assert.equal(lexer.lex(), "-"); 173 | assert.equal(lexer.lex(), "HEX"); 174 | assert.equal(lexer.lex(), "-"); 175 | assert.equal(lexer.lex(), "HEX"); 176 | assert.equal(lexer.lex(), "EOF"); 177 | }; 178 | 179 | exports["test action include"] = function() { 180 | var dict = { 181 | rules: [ 182 | ["x", "return included ? 'Y' : 'N';" ], 183 | ["$", "return 'EOF';" ] 184 | ], 185 | actionInclude: "var included = true;" 186 | }; 187 | 188 | var input = "x"; 189 | 190 | var lexer = new RegExpLexer(dict, input); 191 | assert.equal(lexer.lex(), "Y"); 192 | assert.equal(lexer.lex(), "EOF"); 193 | }; 194 | 195 | exports["test ignored"] = function() { 196 | var dict = { 197 | rules: [ 198 | ["x", "return 'X';" ], 199 | ["y", "return 'Y';" ], 200 | ["\\s+", "/* skip whitespace */" ], 201 | ["$", "return 'EOF';" ] 202 | ] 203 | }; 204 | 205 | var input = "x x y x"; 206 | 207 | var lexer = new RegExpLexer(dict, input); 208 | assert.equal(lexer.lex(), "X"); 209 | assert.equal(lexer.lex(), "X"); 210 | assert.equal(lexer.lex(), "Y"); 211 | assert.equal(lexer.lex(), "X"); 212 | assert.equal(lexer.lex(), "EOF"); 213 | }; 214 | 215 | exports["test disambiguate"] = function() { 216 | var dict = { 217 | rules: [ 218 | ["for\\b", "return 'FOR';" ], 219 | ["if\\b", "return 'IF';" ], 220 | ["[a-z]+", "return 'IDENTIFIER';" ], 221 | ["\\s+", "/* skip whitespace */" ], 222 | ["$", "return 'EOF';" ] 223 | ] 224 | }; 225 | 226 | var input = "if forever for for"; 227 | 228 | var lexer = new RegExpLexer(dict, input); 229 | assert.equal(lexer.lex(), "IF"); 230 | assert.equal(lexer.lex(), "IDENTIFIER"); 231 | assert.equal(lexer.lex(), "FOR"); 232 | assert.equal(lexer.lex(), "FOR"); 233 | assert.equal(lexer.lex(), "EOF"); 234 | }; 235 | 236 | exports["test yytext overwrite"] = function() { 237 | var dict = { 238 | rules: [ 239 | ["x", "yytext = 'hi der'; return 'X';" ] 240 | ] 241 | }; 242 | 243 | var input = "x"; 244 | 245 | var lexer = new RegExpLexer(dict, input); 246 | lexer.lex(); 247 | assert.equal(lexer.yytext, "hi der"); 248 | }; 249 | 250 | exports["test yylineno"] = function() { 251 | var dict = { 252 | rules: [ 253 | ["\\s+", "/* skip whitespace */" ], 254 | ["x", "return 'x';" ], 255 | ["y", "return 'y';" ] 256 | ] 257 | }; 258 | 259 | var input = "x\nxy\n\n\nx"; 260 | 261 | var lexer = new RegExpLexer(dict, input); 262 | assert.equal(lexer.yylineno, 0); 263 | assert.equal(lexer.lex(), "x"); 264 | assert.equal(lexer.lex(), "x"); 265 | assert.equal(lexer.yylineno, 1); 266 | assert.equal(lexer.lex(), "y"); 267 | assert.equal(lexer.yylineno, 1); 268 | assert.equal(lexer.lex(), "x"); 269 | assert.equal(lexer.yylineno, 4); 270 | }; 271 | 272 | exports["test yylloc"] = function() { 273 | var dict = { 274 | rules: [ 275 | ["\\s+", "/* skip whitespace */" ], 276 | ["x", "return 'x';" ], 277 | ["y", "return 'y';" ] 278 | ] 279 | }; 280 | 281 | var input = "x\nxy\n\n\nx"; 282 | 283 | var lexer = new RegExpLexer(dict, input); 284 | assert.equal(lexer.lex(), "x"); 285 | assert.equal(lexer.yylloc.first_column, 0); 286 | assert.equal(lexer.yylloc.last_column, 1); 287 | assert.equal(lexer.lex(), "x"); 288 | assert.equal(lexer.yylloc.first_line, 2); 289 | assert.equal(lexer.yylloc.last_line, 2); 290 | assert.equal(lexer.yylloc.first_column, 0); 291 | assert.equal(lexer.yylloc.last_column, 1); 292 | assert.equal(lexer.lex(), "y"); 293 | assert.equal(lexer.yylloc.first_line, 2); 294 | assert.equal(lexer.yylloc.last_line, 2); 295 | assert.equal(lexer.yylloc.first_column, 1); 296 | assert.equal(lexer.yylloc.last_column, 2); 297 | assert.equal(lexer.lex(), "x"); 298 | assert.equal(lexer.yylloc.first_line, 5); 299 | assert.equal(lexer.yylloc.last_line, 5); 300 | assert.equal(lexer.yylloc.first_column, 0); 301 | assert.equal(lexer.yylloc.last_column, 1); 302 | }; 303 | 304 | exports["test more()"] = function() { 305 | var dict = { 306 | rules: [ 307 | ["x", "return 'X';" ], 308 | ['"[^"]*', function(){ 309 | if(yytext.charAt(yyleng-1) == '\\') { 310 | this.more(); 311 | } else { 312 | yytext += this.input(); // swallow end quote 313 | return "STRING"; 314 | } 315 | } ], 316 | ["$", "return 'EOF';" ] 317 | ] 318 | }; 319 | 320 | var input = 'x"fgjdrtj\\"sdfsdf"x'; 321 | 322 | var lexer = new RegExpLexer(dict, input); 323 | assert.equal(lexer.lex(), "X"); 324 | assert.equal(lexer.lex(), "STRING"); 325 | assert.equal(lexer.lex(), "X"); 326 | assert.equal(lexer.lex(), "EOF"); 327 | }; 328 | 329 | exports["test defined token returns"] = function() { 330 | var tokens = {"2":"X", "3":"Y", "4":"EOF"}; 331 | var dict = { 332 | rules: [ 333 | ["x", "return 'X';" ], 334 | ["y", "return 'Y';" ], 335 | ["$", "return 'EOF';" ] 336 | ] 337 | }; 338 | 339 | var input = "xxyx"; 340 | 341 | var lexer = new RegExpLexer(dict, input, tokens); 342 | 343 | assert.equal(lexer.lex(), 2); 344 | assert.equal(lexer.lex(), 2); 345 | assert.equal(lexer.lex(), 3); 346 | assert.equal(lexer.lex(), 2); 347 | assert.equal(lexer.lex(), 4); 348 | }; 349 | 350 | exports["test module generator from constructor"] = function() { 351 | var dict = { 352 | rules: [ 353 | ["x", "return 'X';" ], 354 | ["y", "return 'Y';" ], 355 | ["$", "return 'EOF';" ] 356 | ] 357 | }; 358 | 359 | var input = "xxyx"; 360 | 361 | var lexerSource = RegExpLexer.generate(dict); 362 | eval(lexerSource); 363 | lexer.setInput(input); 364 | 365 | assert.equal(lexer.lex(), "X"); 366 | assert.equal(lexer.lex(), "X"); 367 | assert.equal(lexer.lex(), "Y"); 368 | assert.equal(lexer.lex(), "X"); 369 | assert.equal(lexer.lex(), "EOF"); 370 | }; 371 | 372 | exports["test module generator"] = function() { 373 | var dict = { 374 | rules: [ 375 | ["x", "return 'X';" ], 376 | ["y", "return 'Y';" ], 377 | ["$", "return 'EOF';" ] 378 | ] 379 | }; 380 | 381 | var input = "xxyx"; 382 | 383 | var lexer_ = new RegExpLexer(dict); 384 | var lexerSource = lexer_.generateModule(); 385 | eval(lexerSource); 386 | lexer.setInput(input); 387 | 388 | assert.equal(lexer.lex(), "X"); 389 | assert.equal(lexer.lex(), "X"); 390 | assert.equal(lexer.lex(), "Y"); 391 | assert.equal(lexer.lex(), "X"); 392 | assert.equal(lexer.lex(), "EOF"); 393 | }; 394 | 395 | exports["test generator with more complex lexer"] = function() { 396 | var dict = { 397 | rules: [ 398 | ["x", "return 'X';" ], 399 | ['"[^"]*', function(){ 400 | if(yytext.charAt(yyleng-1) == '\\') { 401 | this.more(); 402 | } else { 403 | yytext += this.input(); // swallow end quote 404 | return "STRING"; 405 | } 406 | } ], 407 | ["$", "return 'EOF';" ] 408 | ] 409 | }; 410 | 411 | var input = 'x"fgjdrtj\\"sdfsdf"x'; 412 | 413 | var lexer_ = new RegExpLexer(dict); 414 | var lexerSource = lexer_.generateModule(); 415 | eval(lexerSource); 416 | lexer.setInput(input); 417 | 418 | assert.equal(lexer.lex(), "X"); 419 | assert.equal(lexer.lex(), "STRING"); 420 | assert.equal(lexer.lex(), "X"); 421 | assert.equal(lexer.lex(), "EOF"); 422 | }; 423 | 424 | exports["test commonjs module generator"] = function() { 425 | var dict = { 426 | rules: [ 427 | ["x", "return 'X';" ], 428 | ["y", "return 'Y';" ], 429 | ["$", "return 'EOF';" ] 430 | ] 431 | }; 432 | 433 | var input = "xxyx"; 434 | 435 | var lexer_ = new RegExpLexer(dict); 436 | var lexerSource = lexer_.generateCommonJSModule(); 437 | var exports = {}; 438 | eval(lexerSource); 439 | exports.lexer.setInput(input); 440 | 441 | assert.equal(exports.lex(), "X"); 442 | assert.equal(exports.lex(), "X"); 443 | assert.equal(exports.lex(), "Y"); 444 | assert.equal(exports.lex(), "X"); 445 | assert.equal(exports.lex(), "EOF"); 446 | }; 447 | 448 | exports["test amd module generator"] = function() { 449 | var dict = { 450 | rules: [ 451 | ["x", "return 'X';" ], 452 | ["y", "return 'Y';" ], 453 | ["$", "return 'EOF';" ] 454 | ] 455 | }; 456 | 457 | var input = "xxyx"; 458 | 459 | var lexer_ = new RegExpLexer(dict); 460 | var lexerSource = lexer_.generateAMDModule(); 461 | 462 | var lexer; 463 | var define = function (_, fn) { 464 | lexer = fn(); 465 | }; 466 | 467 | eval(lexerSource); 468 | lexer.setInput(input); 469 | 470 | assert.equal(lexer.lex(), "X"); 471 | assert.equal(lexer.lex(), "X"); 472 | assert.equal(lexer.lex(), "Y"); 473 | assert.equal(lexer.lex(), "X"); 474 | assert.equal(lexer.lex(), "EOF"); 475 | }; 476 | 477 | exports["test DJ lexer"] = function() { 478 | var dict = { 479 | "lex": { 480 | "macros": { 481 | "digit": "[0-9]", 482 | "id": "[a-zA-Z][a-zA-Z0-9]*" 483 | }, 484 | 485 | "rules": [ 486 | ["\\/\\/.*", "/* ignore comment */"], 487 | ["main\\b", "return 'MAIN';"], 488 | ["class\\b", "return 'CLASS';"], 489 | ["extends\\b", "return 'EXTENDS';"], 490 | ["nat\\b", "return 'NATTYPE';"], 491 | ["if\\b", "return 'IF';"], 492 | ["else\\b", "return 'ELSE';"], 493 | ["for\\b", "return 'FOR';"], 494 | ["printNat\\b", "return 'PRINTNAT';"], 495 | ["readNat\\b", "return 'READNAT';"], 496 | ["this\\b", "return 'THIS';"], 497 | ["new\\b", "return 'NEW';"], 498 | ["var\\b", "return 'VAR';"], 499 | ["null\\b", "return 'NUL';"], 500 | ["{digit}+", "return 'NATLITERAL';"], 501 | ["{id}", "return 'ID';"], 502 | ["==", "return 'EQUALITY';"], 503 | ["=", "return 'ASSIGN';"], 504 | ["\\+", "return 'PLUS';"], 505 | ["-", "return 'MINUS';"], 506 | ["\\*", "return 'TIMES';"], 507 | [">", "return 'GREATER';"], 508 | ["\\|\\|", "return 'OR';"], 509 | ["!", "return 'NOT';"], 510 | ["\\.", "return 'DOT';"], 511 | ["\\{", "return 'LBRACE';"], 512 | ["\\}", "return 'RBRACE';"], 513 | ["\\(", "return 'LPAREN';"], 514 | ["\\)", "return 'RPAREN';"], 515 | [";", "return 'SEMICOLON';"], 516 | ["\\s+", "/* skip whitespace */"], 517 | [".", "print('Illegal character');throw 'Illegal character';"], 518 | ["$", "return 'ENDOFFILE';"] 519 | ] 520 | } 521 | }; 522 | 523 | var input = "class Node extends Object { \ 524 | var nat value var nat value;\ 525 | var Node next;\ 526 | var nat index;\ 527 | }\ 528 | \ 529 | class List extends Object {\ 530 | var Node start;\ 531 | \ 532 | Node prepend(Node startNode) {\ 533 | startNode.next = start;\ 534 | start = startNode;\ 535 | }\ 536 | \ 537 | nat find(nat index) {\ 538 | var nat value;\ 539 | var Node node;\ 540 | \ 541 | for(node = start;!(node == null);node = node.next){\ 542 | if(node.index == index){\ 543 | value = node.value;\ 544 | } else { 0; };\ 545 | };\ 546 | \ 547 | value;\ 548 | }\ 549 | }\ 550 | \ 551 | main {\ 552 | var nat index;\ 553 | var nat value;\ 554 | var List list;\ 555 | var Node startNode;\ 556 | \ 557 | index = readNat();\ 558 | list = new List;\ 559 | \ 560 | for(0;!(index==0);0){\ 561 | value = readNat();\ 562 | startNode = new Node;\ 563 | startNode.index = index;\ 564 | startNode.value = value;\ 565 | list.prepend(startNode);\ 566 | index = readNat();\ 567 | };\ 568 | \ 569 | index = readNat();\ 570 | \ 571 | for(0;!(index==0);0){\ 572 | printNat(list.find(index));\ 573 | index = readNat();\ 574 | };\ 575 | }"; 576 | 577 | var lexer = new RegExpLexer(dict.lex); 578 | lexer.setInput(input); 579 | var tok; 580 | while (tok = lexer.lex(), tok!==1) { 581 | assert.equal(typeof tok, "string"); 582 | } 583 | }; 584 | 585 | exports["test instantiation from string"] = function() { 586 | var dict = "%%\n'x' {return 'X';}\n'y' {return 'Y';}\n<> {return 'EOF';}"; 587 | 588 | var input = "x"; 589 | 590 | var lexer = new RegExpLexer(dict); 591 | lexer.setInput(input); 592 | 593 | assert.equal(lexer.lex(), "X"); 594 | assert.equal(lexer.lex(), "EOF"); 595 | }; 596 | 597 | exports["test inclusive start conditions"] = function() { 598 | var dict = { 599 | startConditions: { 600 | "TEST": 0, 601 | }, 602 | rules: [ 603 | ["enter-test", "this.begin('TEST');" ], 604 | [["TEST"], "x", "return 'T';" ], 605 | [["TEST"], "y", "this.begin('INITIAL'); return 'TY';" ], 606 | ["x", "return 'X';" ], 607 | ["y", "return 'Y';" ], 608 | ["$", "return 'EOF';" ] 609 | ] 610 | }; 611 | var input = "xenter-testxyy"; 612 | 613 | var lexer = new RegExpLexer(dict); 614 | lexer.setInput(input); 615 | 616 | assert.equal(lexer.lex(), "X"); 617 | assert.equal(lexer.lex(), "T"); 618 | assert.equal(lexer.lex(), "TY"); 619 | assert.equal(lexer.lex(), "Y"); 620 | assert.equal(lexer.lex(), "EOF"); 621 | }; 622 | 623 | exports["test exclusive start conditions"] = function() { 624 | var dict = { 625 | startConditions: { 626 | "EAT": 1, 627 | }, 628 | rules: [ 629 | ["\\/\\/", "this.begin('EAT');" ], 630 | [["EAT"], ".", "" ], 631 | [["EAT"], "\\n", "this.begin('INITIAL');" ], 632 | ["x", "return 'X';" ], 633 | ["y", "return 'Y';" ], 634 | ["$", "return 'EOF';" ] 635 | ] 636 | }; 637 | var input = "xy//yxteadh//ste\ny"; 638 | 639 | var lexer = new RegExpLexer(dict); 640 | lexer.setInput(input); 641 | 642 | assert.equal(lexer.lex(), "X"); 643 | assert.equal(lexer.lex(), "Y"); 644 | assert.equal(lexer.lex(), "Y"); 645 | assert.equal(lexer.lex(), "EOF"); 646 | }; 647 | 648 | exports["test pop start condition stack"] = function() { 649 | var dict = { 650 | startConditions: { 651 | "EAT": 1, 652 | }, 653 | rules: [ 654 | ["\\/\\/", "this.begin('EAT');" ], 655 | [["EAT"], ".", "" ], 656 | [["EAT"], "\\n", "this.popState();" ], 657 | ["x", "return 'X';" ], 658 | ["y", "return 'Y';" ], 659 | ["$", "return 'EOF';" ] 660 | ] 661 | }; 662 | var input = "xy//yxteadh//ste\ny"; 663 | 664 | var lexer = new RegExpLexer(dict); 665 | lexer.setInput(input); 666 | 667 | assert.equal(lexer.lex(), "X"); 668 | assert.equal(lexer.lex(), "Y"); 669 | assert.equal(lexer.lex(), "Y"); 670 | assert.equal(lexer.lex(), "EOF"); 671 | }; 672 | 673 | 674 | exports["test star start condition"] = function() { 675 | var dict = { 676 | startConditions: { 677 | "EAT": 1, 678 | }, 679 | rules: [ 680 | ["\\/\\/", "this.begin('EAT');" ], 681 | [["EAT"], ".", "" ], 682 | ["x", "return 'X';" ], 683 | ["y", "return 'Y';" ], 684 | [["*"],"$", "return 'EOF';" ] 685 | ] 686 | }; 687 | var input = "xy//yxteadh//stey"; 688 | 689 | var lexer = new RegExpLexer(dict); 690 | lexer.setInput(input); 691 | 692 | assert.equal(lexer.lex(), "X"); 693 | assert.equal(lexer.lex(), "Y"); 694 | assert.equal(lexer.lex(), "EOF"); 695 | }; 696 | 697 | exports["test start condition constants"] = function() { 698 | var dict = { 699 | startConditions: { 700 | "EAT": 1, 701 | }, 702 | rules: [ 703 | ["\\/\\/", "this.begin('EAT');" ], 704 | [["EAT"], ".", "if (YYSTATE==='EAT') return 'E';" ], 705 | ["x", "if (YY_START==='INITIAL') return 'X';" ], 706 | ["y", "return 'Y';" ], 707 | [["*"],"$", "return 'EOF';" ] 708 | ] 709 | }; 710 | var input = "xy//y"; 711 | 712 | var lexer = new RegExpLexer(dict); 713 | lexer.setInput(input); 714 | 715 | assert.equal(lexer.lex(), "X"); 716 | assert.equal(lexer.lex(), "Y"); 717 | assert.equal(lexer.lex(), "E"); 718 | assert.equal(lexer.lex(), "EOF"); 719 | }; 720 | 721 | exports["test unicode encoding"] = function() { 722 | var dict = { 723 | rules: [ 724 | ["\\u2713", "return 'CHECK';" ], 725 | ["\\u03c0", "return 'PI';" ], 726 | ["y", "return 'Y';" ] 727 | ] 728 | }; 729 | var input = "\u2713\u03c0y"; 730 | 731 | var lexer = new RegExpLexer(dict); 732 | lexer.setInput(input); 733 | 734 | assert.equal(lexer.lex(), "CHECK"); 735 | assert.equal(lexer.lex(), "PI"); 736 | assert.equal(lexer.lex(), "Y"); 737 | }; 738 | 739 | exports["test unicode"] = function() { 740 | var dict = { 741 | rules: [ 742 | ["π", "return 'PI';" ], 743 | ["y", "return 'Y';" ] 744 | ] 745 | }; 746 | var input = "πy"; 747 | 748 | var lexer = new RegExpLexer(dict); 749 | lexer.setInput(input); 750 | 751 | assert.equal(lexer.lex(), "PI"); 752 | assert.equal(lexer.lex(), "Y"); 753 | }; 754 | 755 | exports["test longest match returns"] = function() { 756 | var dict = { 757 | rules: [ 758 | [".", "return 'DOT';" ], 759 | ["cat", "return 'CAT';" ] 760 | ], 761 | options: {flex: true} 762 | }; 763 | var input = "cat!"; 764 | 765 | var lexer = new RegExpLexer(dict); 766 | lexer.setInput(input); 767 | 768 | assert.equal(lexer.lex(), "CAT"); 769 | assert.equal(lexer.lex(), "DOT"); 770 | }; 771 | 772 | exports["test case insensitivity"] = function() { 773 | var dict = { 774 | rules: [ 775 | ["cat", "return 'CAT';" ] 776 | ], 777 | options: {'case-insensitive': true} 778 | }; 779 | var input = "Cat"; 780 | 781 | var lexer = new RegExpLexer(dict); 782 | lexer.setInput(input); 783 | 784 | assert.equal(lexer.lex(), "CAT"); 785 | }; 786 | 787 | exports["test less"] = function() { 788 | var dict = { 789 | rules: [ 790 | ["cat", "this.less(2); return 'CAT';" ], 791 | ["t", "return 'T';" ] 792 | ], 793 | }; 794 | var input = "cat"; 795 | 796 | var lexer = new RegExpLexer(dict); 797 | lexer.setInput(input); 798 | 799 | assert.equal(lexer.lex(), "CAT"); 800 | assert.equal(lexer.lex(), "T"); 801 | }; 802 | 803 | exports["test EOF unput"] = function() { 804 | var dict = { 805 | startConditions: { 806 | "UN": 1, 807 | }, 808 | rules: [ 809 | ["U", "this.begin('UN');return 'U';" ], 810 | [["UN"],"$", "this.unput('X')" ], 811 | [["UN"],"X", "this.popState();return 'X';" ], 812 | ["$", "return 'EOF'" ] 813 | ] 814 | }; 815 | var input = "U"; 816 | 817 | var lexer = new RegExpLexer(dict); 818 | lexer.setInput(input); 819 | 820 | assert.equal(lexer.lex(), "U"); 821 | assert.equal(lexer.lex(), "X"); 822 | assert.equal(lexer.lex(), "EOF"); 823 | }; 824 | 825 | exports["test flex mode default rule"] = function() { 826 | var dict = { 827 | rules: [ 828 | ["x", "return 'X';" ] 829 | ], 830 | options: {flex: true} 831 | }; 832 | var input = "xyx"; 833 | 834 | var lexer = new RegExpLexer(dict); 835 | lexer.setInput(input); 836 | 837 | assert.equal(lexer.lex(), "X"); 838 | assert.equal(lexer.lex(), "X"); 839 | }; 840 | 841 | exports["test pipe precedence"] = function() { 842 | var dict = { 843 | rules: [ 844 | ["x|y", "return 'X_Y';" ], 845 | [".", "return 'N';"] 846 | ] 847 | }; 848 | var input = "xny"; 849 | 850 | var lexer = new RegExpLexer(dict); 851 | lexer.setInput(input); 852 | 853 | assert.equal(lexer.lex(), "X_Y"); 854 | assert.equal(lexer.lex(), "N"); 855 | assert.equal(lexer.lex(), "X_Y"); 856 | }; 857 | 858 | exports["test ranges"] = function() { 859 | var dict = { 860 | rules: [ 861 | ["x+", "return 'X';" ], 862 | [".", "return 'N';"] 863 | ], 864 | options: {ranges: true} 865 | }; 866 | var input = "xxxyy"; 867 | 868 | var lexer = new RegExpLexer(dict); 869 | lexer.setInput(input); 870 | 871 | assert.equal(lexer.lex(), "X"); 872 | assert.deepEqual(lexer.yylloc.range, [0, 3]); 873 | }; 874 | 875 | exports["test unput location"] = function() { 876 | var dict = { 877 | rules: [ 878 | ["x+", "return 'X';" ], 879 | ["y\\n", "this.unput('\\n'); return 'Y';" ], 880 | ["\\ny", "this.unput('y'); return 'BR';" ], 881 | ["y", "return 'Y';" ], 882 | [".", "return 'N';"] 883 | ], 884 | options: {ranges: true} 885 | }; 886 | var input = "xxxy\ny"; 887 | 888 | var lexer = new RegExpLexer(dict); 889 | lexer.setInput(input); 890 | console.log(lexer.rules); 891 | 892 | assert.equal(lexer.next(), "X"); 893 | assert.deepEqual(lexer.yylloc, {first_line: 1, 894 | first_column: 0, 895 | last_line: 1, 896 | last_column: 3, 897 | range: [0, 3]}); 898 | assert.equal(lexer.next(), "Y"); 899 | assert.deepEqual(lexer.yylloc, {first_line: 1, 900 | first_column: 3, 901 | last_line: 1, 902 | last_column: 4, 903 | range: [3, 4]}); 904 | assert.equal(lexer.next(), "BR"); 905 | assert.deepEqual(lexer.yylloc, {first_line: 1, 906 | first_column: 4, 907 | last_line: 2, 908 | last_column: 0, 909 | range: [4, 5]}); 910 | assert.equal(lexer.next(), "Y"); 911 | assert.deepEqual(lexer.yylloc, {first_line: 2, 912 | first_column: 0, 913 | last_line: 2, 914 | last_column: 1, 915 | range: [5, 6]}); 916 | 917 | }; 918 | 919 | exports["test unput location again"] = function() { 920 | var dict = { 921 | rules: [ 922 | ["x+", "return 'X';" ], 923 | ["y\\ny\\n", "this.unput('\\n'); return 'YY';" ], 924 | ["\\ny", "this.unput('y'); return 'BR';" ], 925 | ["y", "return 'Y';" ], 926 | [".", "return 'N';"] 927 | ], 928 | options: {ranges: true} 929 | }; 930 | var input = "xxxy\ny\ny"; 931 | 932 | var lexer = new RegExpLexer(dict); 933 | lexer.setInput(input); 934 | console.log(lexer.rules); 935 | 936 | assert.equal(lexer.next(), "X"); 937 | assert.deepEqual(lexer.yylloc, {first_line: 1, 938 | first_column: 0, 939 | last_line: 1, 940 | last_column: 3, 941 | range: [0, 3]}); 942 | assert.equal(lexer.next(), "YY"); 943 | assert.deepEqual(lexer.yylloc, {first_line: 1, 944 | first_column: 3, 945 | last_line: 2, 946 | last_column: 1, 947 | range: [3, 6]}); 948 | assert.equal(lexer.next(), "BR"); 949 | assert.deepEqual(lexer.yylloc, {first_line: 2, 950 | first_column: 1, 951 | last_line: 3, 952 | last_column: 0, 953 | range: [6, 7]}); 954 | assert.equal(lexer.next(), "Y"); 955 | assert.deepEqual(lexer.yylloc, {first_line: 3, 956 | first_column: 0, 957 | last_line: 3, 958 | last_column: 1, 959 | range: [7, 8]}); 960 | 961 | }; 962 | 963 | exports["test backtracking lexer reject() method"] = function() { 964 | var dict = { 965 | rules: [ 966 | ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ], 967 | ["[A-Z]+", "return 'WORD';" ], 968 | ["[0-9]+", "return 'NUM';" ] 969 | ], 970 | options: {backtrack_lexer: true} 971 | }; 972 | var input = "A5"; 973 | 974 | var lexer = new RegExpLexer(dict); 975 | lexer.setInput(input); 976 | 977 | assert.equal(lexer.lex(), "WORD"); 978 | assert.equal(lexer.lex(), "NUM"); 979 | }; 980 | 981 | exports["test lexer reject() exception when not in backtracking mode"] = function() { 982 | var dict = { 983 | rules: [ 984 | ["[A-Z]+([0-9]+)", "if (this.matches[1].length) this.reject(); else return 'ID';" ], 985 | ["[A-Z]+", "return 'WORD';" ], 986 | ["[0-9]+", "return 'NUM';" ] 987 | ], 988 | options: {backtrack_lexer: false} 989 | }; 990 | var input = "A5"; 991 | 992 | var lexer = new RegExpLexer(dict); 993 | lexer.setInput(input); 994 | 995 | assert.throws(function() { 996 | lexer.lex(); 997 | }, 998 | function(err) { 999 | return (err instanceof Error) && /You can only invoke reject/.test(err); 1000 | }); 1001 | }; 1002 | 1003 | exports["test yytext state after unput"] = function() { 1004 | var dict = { 1005 | rules: [ 1006 | ["cat4", "this.unput('4'); return 'CAT';" ], 1007 | ["4", "return 'NUMBER';" ], 1008 | ["$", "return 'EOF';"] 1009 | ] 1010 | }; 1011 | 1012 | var input = "cat4"; 1013 | 1014 | var lexer = new RegExpLexer(dict); 1015 | lexer.setInput(input); 1016 | assert.equal(lexer.lex(), "CAT"); 1017 | /*the yytext should be 'cat' since we unput '4' from 'cat4' */ 1018 | assert.equal(lexer.yytext, "cat"); 1019 | assert.equal(lexer.lex(), "NUMBER"); 1020 | assert.equal(lexer.lex(), "EOF"); 1021 | }; 1022 | --------------------------------------------------------------------------------