Tests Index

├── .gitignore ├── cmd └── cmd.js ├── examples ├── Expression │ ├── ExpAstClasses.js │ ├── expeval.js │ ├── expression.jacobgram │ ├── expressionast.jacobgram │ └── testexpast.js └── basictokens.jacoblex ├── gruntfile.js ├── index.js ├── lib ├── automata.js ├── lexer.js ├── parser.js ├── parser │ ├── JacobGram.js │ ├── JacobGramInterpreter.js │ ├── JacobGramLexer.js │ ├── JacobLex.js │ ├── JacobLexInterpreter.js │ ├── JacobLexerLexer.js │ ├── gramgram.js │ ├── gramlex.jacoblex │ ├── lexgram.js │ └── lexlex.js ├── regex.js └── stringreader.js ├── package.json ├── readme.md └── test ├── index.html ├── lexer_compiled_tests.html ├── lexer_compiled_tests.js ├── lexer_dynamic_tests.html ├── lexer_dynamic_tests.js ├── parser_compiled_tests.html ├── parser_compiled_tests.js ├── parser_dynamic_tests.html ├── parser_dynamic_tests.js ├── regex_parsing_tests.html ├── regex_parsing_tests.js └── testexamples.js /.gitignore: -------------------------------------------------------------------------------- 1 | coverage.html 2 | lib-cov 3 | .DS_Store 4 | node_modules 5 | personal_docs 6 | *.sock 7 | testing 8 | _mocha.js 9 | my-reporter.js 10 | *.sw* 11 | lib/browser/diff.js 12 | .idea 13 | *.iml 14 | examples/Expression/expast.js 15 | examples/Expression/expeval.js 16 | examples/Expression/expint.js 17 | examples/basiclexer.js -------------------------------------------------------------------------------- /cmd/cmd.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | /** 3 | * Created by gcannata on 20/08/2014. 4 | */ 5 | 6 | 7 | var argv = require('minimist')(process.argv.slice(2)); 8 | var jacob = require('../index'); 9 | 10 | console.log('JACOB 1.0.5'); 11 | 12 | if(!argv.t && !argv.g){ 13 | printUsage(); 14 | } 15 | var tokenfile = argv.t; 16 | var grammarfile = argv.g; 17 | var fs = require('fs'); 18 | 19 | 20 | 21 | 22 | 23 | if(typeof tokenfile !== 'undefined') { 24 | //Generate Lexer 25 | jacob.elaborateLexFile(tokenfile, argv.l); 26 | } 27 | 28 | 29 | if(typeof grammarfile !== 'undefined') { 30 | //Generate Parser 31 | jacob.elaborateGramFile(grammarfile, argv.p); 32 | } 33 | 34 | function printUsage(){ 35 | console.log('Usage: jacob -t -g [-l lexerfile] [-p parserfile]') 36 | } -------------------------------------------------------------------------------- /examples/Expression/ExpAstClasses.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 22/08/2014. 3 | */ 4 | 5 | var ast = { 6 | 7 | 8 | 9 | 10 | Program: function Program(stmts) { 11 | this.statements = stmts; 12 | }, 13 | 14 | Assignment: function Assignment(id, exp) { 15 | this.id = id; 16 | this.exp = exp; 17 | }, 18 | 19 | Print: function Print( exp) { 20 | this.exp = exp; 21 | }, 22 | 23 | AddExp: function AddExp(e1, e2) { 24 | this.e1 = e1; 25 | this.e2 = e2; 26 | }, 27 | SubtractExp: function SubtractExp(e1, e2) { 28 | this.e1 = e1; 29 | this.e2 = e2; 30 | }, 31 | MultiplyExp: function MultiplyExp(e1, e2) { 32 | this.e1 = e1; 33 | this.e2 = e2; 34 | }, 35 | DivideExp: function DivideExp(e1, e2) { 36 | this.e1 = e1; 37 | this.e2 = e2; 38 | }, 39 | Integer: function Integer(i) { 40 | this.value = i; 41 | }, 42 | Identifier: function Identifier(id) { 43 | this.name = id; 44 | } 45 | 46 | }; 47 | if (typeof(module) !== 'undefined') { 48 | module.exports = ast; 49 | } 50 | -------------------------------------------------------------------------------- /examples/Expression/expeval.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 22/08/2014. 3 | */ 4 | 5 | var astclasses = astclasses || require('./ExpAstClasses'); 6 | 7 | astclasses.Program.prototype.eval = function(cxt){ 8 | cxt = cxt || {}; 9 | for(var i=0;i{digits}*\.{digits}+ { 7 | this.jjval = parseFloat(this.jjtext); 8 | return 'float'; 9 | } 10 | 11 | <>{digits}+ { 12 | this.jjval = parseInt(this.jjtext); 13 | return 'integer'; 14 | } 15 | 16 | print { 17 | return 'print'; 18 | } 19 | 20 | <>\w+ { return 'id'; } 21 | 22 | <>\s* { } 23 | 24 | <>\+ { return 'PLUS'; } 25 | <>. { return this.jjtext; } 26 | 27 | <>$ { console.log('EOF'); return 'EOF'; } -------------------------------------------------------------------------------- /gruntfile.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 30/08/2014. 3 | */ 4 | module.exports = function(grunt) { 5 | 6 | // Add the grunt-mocha-test tasks. 7 | grunt.loadNpmTasks('grunt-mocha-test'); 8 | grunt.initConfig({ 9 | // Configure a mochaTest task 10 | mochaTest: { 11 | test: { 12 | options: { 13 | reporter: 'spec' 14 | }, 15 | src: ['test/**/*.js'] 16 | } 17 | } 18 | }); 19 | 20 | grunt.registerTask('default', ['jacoblex','jacobgram']); 21 | 22 | grunt.registerTask('jacoblex', 'Build JacobLex.js', function() { 23 | var jacob = require('./index'); 24 | jacob.elaborateLexFile('./lib/parser/lexlex.js','./lib/parser/jacoblexerlexer.js'); 25 | jacob.elaborateGramFile('./lib/parser/lexgram.js','./lib/parser/jacoblexinterpreter.js'); 26 | }); 27 | 28 | grunt.registerTask('jacobgram', 'Build JacobLex.js', function() { 29 | var jacob = require('./index'); 30 | jacob.elaborateLexFile('./lib/parser/gramlex.jacoblex','./lib/parser/jacobgramlexer.js'); 31 | jacob.elaborateGramFile('./lib/parser/gramgram.js','./lib/parser/JacobGramInterpreter.js'); 32 | }); 33 | 34 | 35 | 36 | grunt.registerTask('test', 'Run tests', function() { 37 | var jacob = require('./index'); 38 | jacob.elaborateLexFile('./lib/parser/gramlex.jacoblex','./lib/parser/jacobgramlexer.js'); 39 | jacob.elaborateGramFile('./lib/parser/gramgram.js','./lib/parser/JacobGramInterpreter.js'); 40 | }); 41 | 42 | grunt.registerTask('test', 'mochaTest'); 43 | }; -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 18/08/2014. 3 | */ 4 | 5 | var lexer = require('./lib/lexer'); 6 | var parser = require('./lib/parser'); 7 | 8 | exports.lexer = lexer; 9 | exports.parser = parser; 10 | exports.StringReader = require('./lib/stringreader'); 11 | 12 | function generateLexerSource(jacoblex){ 13 | 14 | var tokenspecs; 15 | if(typeof jacoblex === 'string'){ 16 | tokenspecs = require('./lib/parser/JacobLex')(jacoblex); 17 | } else { 18 | tokenspecs = jacoblex; 19 | } 20 | 21 | return lexer.generateLexer(tokenspecs); 22 | } 23 | exports.generateLexerSource = generateLexerSource; 24 | 25 | function elaborateLexFile(tokenfile, outfile) { 26 | var fs = fs || require('fs'); 27 | var path = require('path'); 28 | var tokensrc = fs.readFileSync(tokenfile).toString(); 29 | var tokenspecs; 30 | if (tokenfile.indexOf('.js', tokenfile.length - 3) !== -1) { 31 | tokenspecs = eval(tokensrc); 32 | } else { 33 | tokenspecs = tokensrc; 34 | } 35 | 36 | var lexersrc = generateLexerSource(tokenspecs); 37 | var lexerout = outfile || path.join(path.dirname(tokenfile), (tokenspecs.moduleName || path.basename(tokenfile)+'.out') + '.js'); 38 | console.log('Generated file '+lexerout); 39 | fs.writeFileSync(lexerout, lexersrc); 40 | } 41 | exports.elaborateLexFile = elaborateLexFile; 42 | 43 | 44 | 45 | function generateParserSource(jacobgram){ 46 | var parserspecs; 47 | if(typeof jacobgram === 'string'){ 48 | parserspecs = require('./lib/parser/JacobGram')(jacobgram); 49 | } else { 50 | parserspecs = jacobgram; 51 | } 52 | 53 | return parser.generateParser(parserspecs); 54 | } 55 | 56 | exports.generateParserSource = generateParserSource; 57 | 58 | function elaborateGramFile(grammarfile, outfile) { 59 | var fs = fs || require('fs'); 60 | var path = require('path'); 61 | var grammarsrc = fs.readFileSync(grammarfile).toString(); 62 | var grammar; 63 | if (grammarfile.indexOf('.js', grammarfile.length - 3) !== -1) { 64 | grammar = eval(grammarsrc); 65 | } else { 66 | grammar = grammarsrc; 67 | } 68 | 69 | var parsersrc = generateParserSource(grammar); 70 | var parserout = outfile || path.join(path.dirname(grammarfile), ( grammar.moduleName || path.basename(grammarfile)+'.out') + '.js'); 71 | console.log('Generated file '+parserout); 72 | fs.writeFileSync(parserout, parsersrc); 73 | } 74 | 75 | exports.elaborateGramFile = elaborateGramFile; -------------------------------------------------------------------------------- /lib/lexer.js: -------------------------------------------------------------------------------- 1 | var junq = junq || require('junq'); 2 | var sets = sets || require('junq/sets'); 3 | var StringReader = StringReader || require('./stringreader.js'); 4 | var automata = automata || require('./automata.js'); 5 | var regex = regex || require('./regex'); 6 | 7 | var lexer; 8 | (function (lexer,dfa,regex, undefined) { 9 | "use strict"; 10 | 11 | var EOF = {}; 12 | 13 | var mergeNFAs = function (nfas) { 14 | var start = new dfa.State(); 15 | var rules = junq(nfas) 16 | .flatmap(function (nfa) { 17 | return nfa.getRules(); 18 | }) 19 | 20 | .append( 21 | junq(nfas) 22 | .map(function (nfa) { 23 | return nfa.startstate; 24 | }) 25 | .map(function (ss) { 26 | return new dfa.Rule(start, dfa.eps, ss); 27 | }) 28 | ) //we append an empty move from the new start to each of the NFA start state 29 | 30 | .toArray(); 31 | var acceptstates = junq(nfas) 32 | .flatmap(function (nfa) { 33 | return nfa.acceptstates; 34 | }); 35 | 36 | var nrb = new dfa.NDRuleBook(rules); 37 | var specs = {rulebook:nrb, acceptstates:acceptstates, startstate:start,alphabet:nrb.getSymbols()}; 38 | var compositeNFA = new dfa.NFA(specs); 39 | return compositeNFA; 40 | 41 | }; 42 | 43 | function processRules(specs) 44 | { 45 | var res = {}; 46 | res.rules=[]; 47 | res.actions=[]; 48 | res.states = {}; 49 | //compile tokens 50 | var tokenid=0; 51 | junq(specs.tokens).map(function(tokenspec){ 52 | var rule = {re: regex.parseRegExp(resolveDefinitions(specs, tokenspec.regexp)), state: tokenspec.state, action: tokenspec.action}; 53 | return rule; 54 | }) //here we have resolved definitions and parsed regexp 55 | .map(function(rule){ 56 | return expandLookAheads(rule, tokenid++); 57 | }) 58 | .flatmap(function(r){return r;}) 59 | .forEach(function(tokenspec){ 60 | res.rules.push(tokenspec); 61 | 62 | var actionid = res.actions.push(tokenspec.action)-1; 63 | var statesList = tokenspec.state ||['DEFAULT']; 64 | if(typeof statesList === 'string'){ 65 | if(statesList.length === 0){ 66 | statesList = 'DEFAULT'; 67 | } 68 | statesList = [statesList]; 69 | } 70 | junq(statesList).forEach(function(state){ 71 | res.states[state] = res.states[state] || {dfa:null}; 72 | if(tokenspec.re === dfa.EOF){ 73 | res.states[state].eofaction = actionid; 74 | } 75 | }); 76 | 77 | }) 78 | ; 79 | return res; 80 | } 81 | 82 | function expandLookAheads(rule, tokenid){ 83 | if(rule.re.isLookAhead()){ 84 | 85 | var minmax=rule.re.second.getMinMaxLength(); 86 | //nullable, we use just the head of the original RE 87 | if(minmax.min===0) { 88 | rule1 = {}; 89 | rule1.re = rule.re.first; 90 | rule1.action = rule.action; 91 | rule1.state = rule.state; 92 | return rule1; 93 | } 94 | //non nullable and not fixed length, we have to find the shortest tail 95 | if(minmax.max===Infinity){ 96 | var internalStateName = '_LA_'+tokenid; 97 | var rules = []; 98 | var rule1 = {}; 99 | rule1.re = new regex.Concat(rule.re.first,rule.re.second); 100 | rule1.action = new Function("this.pushState('"+internalStateName+"');\nthis.lawhole=this.jjtext;"); 101 | rule1.state = rule.state; 102 | rules.push(rule1); 103 | var rule2 = {state: internalStateName}; 104 | rule2.re = rule.re.second; 105 | rule2.action = new Function("this.restoreLookAhead();\nreturn ("+rule.action.toString()+').apply(this);'); 106 | rules.push(rule2); 107 | var rule3 = {state: internalStateName}; 108 | rule3.re = regex.parseRegExp('\\n|\\r|.'); 109 | rule3.action = new Function('this.less(2);\n'); 110 | 111 | rules.push(rule3); 112 | return rules; 113 | } 114 | 115 | //fixed length, we use a simpler method 116 | if((minmax.min===minmax.max) && minmax.max=0)); 166 | }).toArray(); 167 | //var nfas = [], tokenspecs =specs.tokens; 168 | buildAutomataInternal(rules, rulesforstate, nfas); 169 | //TODO: check if nfas.length>1 170 | var composite = mergeNFAs(nfas); 171 | var dfaspecs = composite.toDFA(); 172 | dfaspecs.alphabet = dfaspecs.rulebook.getSymbols(); 173 | var compositeDFA = new dfa.DFA(dfaspecs); 174 | compositeDFA.minimize(); 175 | return compositeDFA; 176 | }; 177 | 178 | //lex.buildAutomata = buildAutomata; 179 | 180 | 181 | function generateLexer(specs){ 182 | var lexerName ='Lexer'; 183 | if(specs && specs.moduleName){ 184 | lexerName = specs.moduleName; 185 | } 186 | 187 | var str = []; 188 | str.push('var '+lexerName+' = (function (undefined) {'); 189 | 190 | var res = processRules(specs); 191 | 192 | 193 | str.push(new dfa.DFA().compileBase('CDFA_base')); 194 | 195 | for(var specialstate in res.states){ 196 | if(res.states.hasOwnProperty(specialstate)){ 197 | if(specialstate === 'undefined') specialstate='DEFAULT'; 198 | 199 | str.push(buildAutomata(res.rules,specialstate).compile( 200 | { className:'CDFA_'+specialstate, 201 | baseClass: 'CDFA_base' 202 | })); 203 | } 204 | } 205 | str.push('var EOF={};'); 206 | str.push('function Lexer(){\n'); 207 | str.push('if(!(this instanceof Lexer)) return new Lexer();\n'); 208 | str.push('this.pos={line:0,col:0};\n'); 209 | str.push('this.states={};'); 210 | str.push('this.state = [\'DEFAULT\'];'); 211 | str.push('this.lastChar = \'\\n\';'); 212 | 213 | str.push('this.actions = ['+res.actions+'];'); 214 | 215 | 216 | for(specialstate in res.states){ 217 | if(res.states.hasOwnProperty(specialstate)){ 218 | //if(specialstate === 'undefined') specialstate=undefined; 219 | str.push('this.states["'+specialstate+'"] = {};'); 220 | str.push('this.states["'+specialstate+'"].dfa = new '+ 'CDFA_'+specialstate+'();'); 221 | if( res.states[specialstate].eofaction){ 222 | str.push('this.states["'+specialstate+'"].eofaction = '+res.states[specialstate].eofaction+';'); 223 | } 224 | } 225 | 226 | } 227 | 228 | str.push('}'); 229 | 230 | 231 | str.push( 232 | junq(['setInput','nextToken','resetToken','halt','more','less','getDFA','getAction', 233 | 'pushState', 'popState','getState','restoreLookAhead','evictTail','isEOF']).map(function(mname){ 234 | return 'Lexer.prototype.'+mname+'=' + Lexer.prototype[mname].toString(); 235 | }).toArray().join(';\n') 236 | ); 237 | str.push(';'); 238 | 239 | str.push(StringReader.toString()); 240 | for(var mname in StringReader.prototype){ 241 | str.push('StringReader.prototype.'+mname+'=' + StringReader.prototype[mname].toString()+';' ); 242 | } 243 | str.push('if (typeof(module) !== \'undefined\') { module.exports = Lexer; }'); 244 | str.push('return Lexer;})();'); 245 | 246 | return str.join('\r\n'); 247 | } 248 | lexer.generateLexer = generateLexer; 249 | 250 | /*********** LEXER *************/ 251 | function Lexer(specs) { 252 | if(!(this instanceof Lexer)) return new Lexer(specs); 253 | this.input = undefined; 254 | this.actions = []; 255 | this.states = {}; 256 | this.state = [undefined]; 257 | this.lawhole=undefined; 258 | var res = processRules(specs); 259 | this.actions = res.actions; 260 | this.states = res.states; 261 | 262 | for(var specialstate in this.states){ 263 | if(this.states.hasOwnProperty(specialstate)){ 264 | if(specialstate === 'undefined') specialstate=undefined; 265 | this.states[specialstate].dfa = buildAutomata(res.rules,specialstate); 266 | } 267 | } 268 | 269 | } 270 | 271 | Lexer.prototype.setInput = function(input){ 272 | this.pos={row:0, col:0}; 273 | if(typeof input === 'string') 274 | {input = new StringReader(input);} 275 | this.input = input; 276 | this.state = ['DEFAULT']; 277 | this.lastChar='\n'; 278 | this.getDFA().reset(); 279 | return this; 280 | }; 281 | 282 | Lexer.prototype.pushState = function(state){ 283 | this.state.push(state); 284 | this.getDFA().reset(); 285 | }; 286 | 287 | Lexer.prototype.popState = function(){ 288 | if(this.state.length>1) { 289 | this.state.pop(); 290 | this.getDFA().reset(); 291 | } 292 | }; 293 | 294 | Lexer.prototype.restoreLookAhead = function(){ 295 | this.tailLength = this.jjtext.length; 296 | this.popState(); 297 | this.less(this.tailLength); 298 | this.jjtext = this.lawhole.substring(0,this.lawhole.length-this.tailLength); 299 | 300 | 301 | }; 302 | 303 | Lexer.prototype.evictTail = function(length){ 304 | this.less(length); 305 | this.jjtext = this.jjtext.substring(0,this.jjtext.length-length); 306 | }; 307 | 308 | 309 | Lexer.prototype.getState = function(){ 310 | return this.state[this.state.length-1]; 311 | }; 312 | 313 | Lexer.prototype.getDFA = function(){ 314 | return this.states[this.getState()].dfa; 315 | }; 316 | 317 | Lexer.prototype.getAction = function(i){ 318 | return this.actions[i]; 319 | }; 320 | 321 | Lexer.prototype.nextToken = function () { 322 | 323 | 324 | var ret = undefined; 325 | while(ret === undefined){ 326 | this.resetToken(); 327 | ret = this.more(); 328 | } 329 | 330 | 331 | if (ret === EOF) { 332 | this.current = EOF; 333 | } else { 334 | this.current = {}; 335 | this.current.name = ret; 336 | this.current.value = this.jjval; 337 | this.current.lexeme = this.jjtext; 338 | this.current.position = this.jjpos; 339 | this.current.pos = {col: this.jjcol, line: this.jjline}; 340 | } 341 | return this.current; 342 | }; 343 | 344 | 345 | Lexer.prototype.more = function(){ 346 | var ret; 347 | while (this.input.more()) { 348 | var c = this.input.peek(); 349 | this.getDFA().readSymbol(c); 350 | if (this.getDFA().isInDeadState()) { 351 | 352 | ret = this.halt(); 353 | return ret; 354 | 355 | } else { 356 | if (this.getDFA().isAccepting()) { 357 | this.lastValid = this.getDFA().getCurrentToken(); 358 | this.lastValidPos = this.input.getPos(); 359 | 360 | } 361 | this.buffer = this.buffer + c; 362 | this.lastChar = c; 363 | this.input.next(); 364 | } 365 | 366 | } 367 | ret = this.halt(); 368 | return ret; 369 | }; 370 | 371 | Lexer.prototype.resetToken = function(){ 372 | this.getDFA().reset(); 373 | this.getDFA().bol = (this.lastChar === '\n'); 374 | this.lastValid = undefined; 375 | this.lastValidPos = -1; 376 | this.jjtext = ''; 377 | this.remains = ''; 378 | this.buffer = ''; 379 | this.startpos = this.input.getPos(); 380 | this.jjline = this.input.line; 381 | this.jjcol = this.input.col; 382 | }; 383 | 384 | Lexer.prototype.halt = function () { 385 | if (this.lastValidPos >= 0) { 386 | var lastValidLength = this.lastValidPos-this.startpos+1; 387 | this.jjtext = this.buffer.substring(0, lastValidLength); 388 | this.remains = this.buffer.substring(lastValidLength); 389 | this.jjval = this.jjtext; 390 | this.jjpos = this.lastValidPos + 1-this.jjtext.length; 391 | this.input.rollback(this.remains); 392 | var action = this.getAction(this.lastValid); 393 | if (typeof ( action) === 'function') { 394 | return action.call(this); 395 | } 396 | this.resetToken(); 397 | } 398 | else if(!this.input.more()){//EOF 399 | var actionid = this.states[this.getState()].eofaction; 400 | if(actionid){ 401 | action = this.getAction(actionid); 402 | if (typeof ( action) === 'function') { 403 | //Note we don't care of returned token, must return 'EOF' 404 | action.call(this); 405 | } 406 | } 407 | return EOF; 408 | } else {//Unexpected character 409 | throw new Error('Unexpected char \''+this.input.peek()+'\' at '+this.jjline +':'+this.jjcol); 410 | } 411 | }; 412 | 413 | 414 | Lexer.prototype.less = function(length){ 415 | this.input.rollback(length); 416 | }; 417 | 418 | Lexer.prototype.isEOF = function(o){ 419 | return o===EOF; 420 | }; 421 | 422 | lexer.EOF = EOF; 423 | lexer.Lexer = Lexer; 424 | 425 | })( lexer || (lexer={}),automata,regex); 426 | 427 | if (typeof(module) !== 'undefined') { module.exports = lexer; } -------------------------------------------------------------------------------- /lib/parser/JacobGram.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 27/08/2014. 3 | */ 4 | 5 | //node ./cmd/cmd.js -t ./lib/parser/gramlex.jacoblex -g ./lib/parser/gramgram.js -l ./lib/parser/jacobgramlexer.js -p ./lib/parser/JacobGramInterpreter.js 6 | 7 | 8 | 9 | function parseJacobGrammar(str){ 10 | var Lexer = require('./JacobGramLexer'); 11 | var Parser = require('./JacobGramInterpreter'); 12 | var junq = require('junq'); 13 | var _p = require('../parser'); 14 | var l = new Lexer().setInput(str); 15 | var p = new Parser({junq: junq, parser:_p}); 16 | var grammar = {}; 17 | var ret = p.parse(l,grammar); 18 | return grammar; 19 | } 20 | 21 | module.exports = parseJacobGrammar; -------------------------------------------------------------------------------- /lib/parser/JacobGramInterpreter.js: -------------------------------------------------------------------------------- 1 | var JacobGramInterpreter = (function (undefined) { 2 | function Parser(environment){ 3 | if(!(this instanceof Parser)) return new Parser(environment); 4 | var env = environment; 5 | this.action={"0":{"0":["reduce",[2,0,15]],"5":["reduce",[2,0,15]],"9":["reduce",[2,0,15]],"10":["reduce",[2,0,15]]},"1":{"0":["accept",[]]},"2":{"0":["reduce",[3,0,17]],"5":["shift",[7]],"9":["shift",[8]],"10":["reduce",[3,0,17]]},"3":{"0":["reduce",[1,2,0]],"10":["shift",[10]]},"4":{"0":["reduce",[2,2,16]],"5":["reduce",[2,2,16]],"9":["reduce",[2,2,16]],"10":["reduce",[2,2,16]]},"5":{"0":["reduce",[30,1,27]],"5":["reduce",[30,1,27]],"9":["reduce",[30,1,27]],"10":["reduce",[30,1,27]]},"6":{"0":["reduce",[30,1,28]],"5":["reduce",[30,1,28]],"9":["reduce",[30,1,28]],"10":["reduce",[30,1,28]]},"7":{"6":["shift",[11]]},"8":{"10":["shift",[12]]},"9":{"0":["reduce",[3,2,18]],"10":["reduce",[3,2,18]]},"10":{"12":["shift",[13]]},"11":{"0":["reduce",[7,0,19]],"5":["reduce",[7,0,19]],"6":["reduce",[7,0,19]],"9":["reduce",[7,0,19]],"10":["reduce",[7,0,19]]},"12":{"0":["reduce",[8,2,2]],"5":["reduce",[8,2,2]],"9":["reduce",[8,2,2]],"10":["reduce",[8,2,2]]},"13":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"14":{"0":["reduce",[4,3,1]],"5":["reduce",[4,3,1]],"6":["shift",[18]],"9":["reduce",[4,3,1]],"10":["reduce",[4,3,1]]},"15":{"14":["shift",[19]]},"16":{"14":["reduce",[29,0,25]],"18":["reduce",[29,0,25]],"31":["shift",[22]]},"17":{"6":["shift",[25]],"10":["shift",[24]],"14":["reduce",[15,1,13]],"18":["reduce",[15,1,13]],"22":["shift",[26]],"23":["reduce",[15,1,13]],"24":["shift",[27]],"25":["reduce",[15,1,13]],"26":["shift",[28]],"27":["reduce",[15,1,13]],"31":["reduce",[15,1,13]]},"18":{"0":["reduce",[7,2,20]],"5":["reduce",[7,2,20]],"6":["reduce",[7,2,20]],"9":["reduce",[7,2,20]],"10":["reduce",[7,2,20]]},"19":{"0":["reduce",[11,4,3]],"10":["reduce",[11,4,3]]},"20":{"14":["reduce",[17,0,6]],"18":["reduce",[17,0,6]]},"21":{"14":["reduce",[16,1,14]],"18":["reduce",[16,1,14]]},"22":{"14":["reduce",[29,1,26]],"18":["reduce",[29,1,26]]},"23":{"6":["reduce",[28,2,24]],"10":["reduce",[28,2,24]],"14":["reduce",[28,2,24]],"18":["reduce",[28,2,24]],"22":["reduce",[28,2,24]],"23":["reduce",[28,2,24]],"24":["reduce",[28,2,24]],"25":["reduce",[28,2,24]],"26":["reduce",[28,2,24]],"27":["reduce",[28,2,24]],"31":["reduce",[28,2,24]]},"24":{"6":["reduce",[21,1,8]],"10":["reduce",[21,1,8]],"14":["reduce",[21,1,8]],"18":["reduce",[21,1,8]],"22":["reduce",[21,1,8]],"23":["reduce",[21,1,8]],"24":["reduce",[21,1,8]],"25":["reduce",[21,1,8]],"26":["reduce",[21,1,8]],"27":["reduce",[21,1,8]],"31":["reduce",[21,1,8]]},"25":{"6":["reduce",[21,1,9]],"10":["reduce",[21,1,9]],"14":["reduce",[21,1,9]],"18":["reduce",[21,1,9]],"22":["reduce",[21,1,9]],"23":["reduce",[21,1,9]],"24":["reduce",[21,1,9]],"25":["reduce",[21,1,9]],"26":["reduce",[21,1,9]],"27":["reduce",[21,1,9]],"31":["reduce",[21,1,9]]},"26":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"27":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"28":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"29":{"14":["reduce",[13,3,4]],"18":["shift",[34]]},"30":{"23":["shift",[35]]},"31":{"25":["shift",[36]]},"32":{"18":["reduce",[20,0,21]],"25":["reduce",[20,0,21]]},"33":{"27":["shift",[38]]},"34":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"35":{"6":["reduce",[21,3,10]],"10":["reduce",[21,3,10]],"14":["reduce",[21,3,10]],"18":["reduce",[21,3,10]],"22":["reduce",[21,3,10]],"23":["reduce",[21,3,10]],"24":["reduce",[21,3,10]],"25":["reduce",[21,3,10]],"26":["reduce",[21,3,10]],"27":["reduce",[21,3,10]],"31":["reduce",[21,3,10]]},"36":{"6":["reduce",[21,3,11]],"10":["reduce",[21,3,11]],"14":["reduce",[21,3,11]],"18":["reduce",[21,3,11]],"22":["reduce",[21,3,11]],"23":["reduce",[21,3,11]],"24":["reduce",[21,3,11]],"25":["reduce",[21,3,11]],"26":["reduce",[21,3,11]],"27":["reduce",[21,3,11]],"31":["reduce",[21,3,11]]},"37":{"18":["shift",[40]],"25":["reduce",[19,2,7]]},"38":{"6":["reduce",[21,3,12]],"10":["reduce",[21,3,12]],"14":["reduce",[21,3,12]],"18":["reduce",[21,3,12]],"22":["reduce",[21,3,12]],"23":["reduce",[21,3,12]],"24":["reduce",[21,3,12]],"25":["reduce",[21,3,12]],"26":["reduce",[21,3,12]],"27":["reduce",[21,3,12]],"31":["reduce",[21,3,12]]},"39":{"14":["reduce",[29,0,25]],"18":["reduce",[29,0,25]],"31":["shift",[22]]},"40":{"6":["reduce",[28,0,23]],"10":["reduce",[28,0,23]],"14":["reduce",[28,0,23]],"18":["reduce",[28,0,23]],"22":["reduce",[28,0,23]],"23":["reduce",[28,0,23]],"24":["reduce",[28,0,23]],"25":["reduce",[28,0,23]],"26":["reduce",[28,0,23]],"27":["reduce",[28,0,23]],"31":["reduce",[28,0,23]]},"41":{"14":["reduce",[17,4,5]],"18":["reduce",[17,4,5]]},"42":{"18":["reduce",[20,3,22]],"25":["reduce",[20,3,22]]}}; 6 | this.goto={"0":{"1":1,"2":2},"2":{"3":3,"4":5,"8":6,"30":4},"3":{"11":9},"11":{"7":14},"13":{"13":15,"15":16,"28":17},"16":{"16":20,"29":21},"17":{"21":23},"20":{"17":29},"26":{"15":30,"28":17},"27":{"15":32,"19":31,"28":17},"28":{"15":33,"28":17},"32":{"20":37},"34":{"15":39,"28":17},"39":{"16":41,"29":21},"40":{"15":42,"28":17}}; 7 | this.actions=[function (operators, prods){ 8 | this.productions = [].concat.apply([],prods); 9 | return prods; 10 | },function (assoc, symbol, symbols){ 11 | 12 | var symbols = [symbol].concat(symbols); 13 | this.operators = this.operators || []; 14 | var max = 0; 15 | if(this.operators.length>0){ 16 | max = this.operators[this.operators.length-1][2]; 17 | } 18 | max = max + 100; 19 | for(var i=0;i>":0,"Grammar":1,"Repeat_0_0":2,"Repeat_0_2":3,"OperatorDecl":4,"Op":5,"Terminal":6,"Repeat_1_4":7,"DirectiveDecl":8,"Directive":9,"id":10,"Rule":11,"=":12,"AlternativesWithActions":13,";":14,"RHS":15,"Action":16,"RHSRepeat":17,"|":18,"Alternatives":19,"Repeat_7_6":20,"RHSAtom":21,"[":22,"]":23,"(":24,")":25,"{":26,"}":27,"Repeat_13_8":28,"Optional_14_10":29,"Group16_0":30,"function":31}; 93 | this.actionMode='function'; 94 | } 95 | Parser.prototype.identity=function (x) { 96 | "use strict"; 97 | return x; 98 | }; 99 | Parser.prototype.parse=function (lexer, context) { 100 | this.stack = []; 101 | this.context = context || {}; 102 | 103 | this.lexer = lexer; 104 | this.a = this.lexer.nextToken(); 105 | this.stack.push({s: this.startstate, i: 0}); 106 | this.accepted = false; 107 | this.inerror = false; 108 | while (!this.accepted && !this.inerror) { 109 | var top = this.stack[this.stack.length - 1]; 110 | var s = top.s; 111 | //this.a = this.currentToken; 112 | if(lexer.isEOF(this.a)) 113 | this.an = 0; 114 | else 115 | this.an = this.symbolsTable[this.a.name]; 116 | var action = this.action[s][this.an]; 117 | if (action !== undefined) { 118 | this[action[0]].apply(this, action[1]); 119 | } else { 120 | this.inerror = true; 121 | this.error(this.a,this); 122 | } 123 | } 124 | return top.i.value; 125 | }; 126 | Parser.prototype.shift=function (state) { 127 | "use strict"; 128 | this.stack.push({s: state, i: this.a}); 129 | this.a = this.lexer.nextToken(); 130 | 131 | }; 132 | Parser.prototype.reduce=function (head, length, prodindex) { 133 | "use strict"; 134 | //var prod = this.productions[prodnumber]; 135 | var self = this; 136 | var rhs = this.stack.splice(-length, length); 137 | var t = this.stack[this.stack.length - 1]; 138 | var ns = this.goto[t.s][head]; 139 | var value; 140 | if (this.actions) { 141 | var action = this.actions[prodindex] || this.identity; 142 | var values = rhs.map(function (si) { 143 | return si.i.value; 144 | }); 145 | 146 | if(self.actionMode==='constructor') 147 | value = this.create(action,values); 148 | else 149 | value = action.apply(this.context, values); 150 | } 151 | //If we are debugging 152 | 153 | if(this.symbols) { 154 | var nt = {name: this.symbols[head].name, value:value}; 155 | this.stack.push({s: ns, i: nt}); 156 | } 157 | else 158 | { 159 | this.stack.push({s: ns,i:{value: value}}); 160 | } 161 | 162 | }; 163 | Parser.prototype.accept=function () { 164 | "use strict"; 165 | this.accepted = true; 166 | }; 167 | Parser.prototype.error=function (token){ 168 | if(this.lexer.isEOF(token)){ 169 | throw Error("Unexpected EOF at "+this.lexer.jjline+':'+this.lexer.jjcol); 170 | } else 171 | throw Error('Unexpected token '+token.name+' "'+token.lexeme+'" at ('+token.pos.line+':'+token.pos.col+')'); 172 | }; 173 | Parser.prototype.create=function (ctor,args){ 174 | var args = [this.context].concat(args); 175 | var factory = ctor.bind.apply(ctor,args); 176 | return new factory(); 177 | }; 178 | if (typeof(module) !== 'undefined') { module.exports = Parser; } 179 | return Parser; 180 | })(); -------------------------------------------------------------------------------- /lib/parser/JacobGramLexer.js: -------------------------------------------------------------------------------- 1 | var JacobGramLexer = (function (undefined) { 2 | function CDFA_base(){ 3 | this.ss=undefined; 4 | this.as=undefined; 5 | this.tt=undefined; 6 | this.stt={}; 7 | } 8 | CDFA_base.prototype.reset = function (state) { 9 | this.cs = state || this.ss; 10 | this.bol=false; 11 | }; 12 | CDFA_base.prototype.readSymbol = function (c) { 13 | this.cs = this.nextState(this.cs, c); 14 | }; 15 | CDFA_base.prototype.isAccepting = function () { 16 | var acc = this.as.indexOf(this.cs)>=0; 17 | if((this.stt[this.cs]===-1)&&!this.bol){ 18 | acc=false;} 19 | return acc;}; 20 | CDFA_base.prototype.isInDeadState = function () { 21 | return this.cs === undefined || this.cs === 0; 22 | }; 23 | CDFA_base.prototype.getCurrentToken = function(){ 24 | var t= this.tt[this.cs]; 25 | var s=this.stt[this.cs]; 26 | if(s!==undefined){return this.bol?t:s;} 27 | return t;}; 28 | 29 | function CDFA_DEFAULT(){ 30 | this.ss=1; 31 | this.as=[2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39]; 32 | this.tt=[null,null,15,14,14,15,15,15,13,13,7,7,7,7,null,0,3,13,13,7,7,7,8,13,7,7,7,13,6,7,7,13,7,6,13,7,13,7,9,6]; 33 | this.stt={}; 34 | } 35 | CDFA_DEFAULT.prototype= new CDFA_base(); 36 | CDFA_DEFAULT.prototype.nextState = function(state, c){ 37 | var next = 0; 38 | switch(state){ 39 | case 1: 40 | if((c < "\t" || "\n" < c) && (c < "\r" || "\r" < c) && (c < " " || " " < c) && (c < "%" || "%" < c) && (c < "'" || "'" < c) && (c < "/" || "9" < c) && (c < "A" || "Z" < c) && (c < "_" || "_" < c) && (c < "a" || "z" < c) ){ 41 | next = 2; 42 | } else if(("\t" === c ) || (" " === c )){ 43 | next = 3; 44 | } else if(("\n" === c ) || ("\r" === c )){ 45 | next = 3; 46 | } else if(("%" === c )){ 47 | next = 5; 48 | } else if(("'" === c )){ 49 | next = 6; 50 | } else if(("/" === c )){ 51 | next = 7; 52 | } else if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "e") || ("g" <= c && c <= "z") ){ 53 | next = 8; 54 | } else if(("f" === c )){ 55 | next = 9; 56 | } 57 | break; 58 | case 5: 59 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "k") || ("m" === c ) || ("o" <= c && c <= "q") || ("s" <= c && c <= "z") ){ 60 | next = 10; 61 | } else if(("l" === c )){ 62 | next = 11; 63 | } else if(("n" === c )){ 64 | next = 12; 65 | } else if(("r" === c )){ 66 | next = 13; 67 | } 68 | break; 69 | case 6: 70 | if((c < "'" || "'" < c) && (c < "|" || "|" < c) ){ 71 | next = 14; 72 | } 73 | break; 74 | case 7: 75 | if(("*" === c )){ 76 | next = 15; 77 | } else if(("/" === c )){ 78 | next = 16; 79 | } 80 | break; 81 | case 8: 82 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 83 | next = 8; 84 | } 85 | break; 86 | case 9: 87 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "t") || ("v" <= c && c <= "z") ){ 88 | next = 8; 89 | } else if(("u" === c )){ 90 | next = 18; 91 | } 92 | break; 93 | case 10: 94 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 95 | next = 10; 96 | } 97 | break; 98 | case 11: 99 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "d") || ("f" <= c && c <= "z") ){ 100 | next = 10; 101 | } else if(("e" === c )){ 102 | next = 19; 103 | } 104 | break; 105 | case 12: 106 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "n") || ("p" <= c && c <= "z") ){ 107 | next = 10; 108 | } else if(("o" === c )){ 109 | next = 20; 110 | } 111 | break; 112 | case 13: 113 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "h") || ("j" <= c && c <= "z") ){ 114 | next = 10; 115 | } else if(("i" === c )){ 116 | next = 21; 117 | } 118 | break; 119 | case 14: 120 | if((c < "'" || "'" < c) && (c < "|" || "|" < c) ){ 121 | next = 14; 122 | } else if(("'" === c )){ 123 | next = 22; 124 | } 125 | break; 126 | case 18: 127 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "m") || ("o" <= c && c <= "z") ){ 128 | next = 8; 129 | } else if(("n" === c )){ 130 | next = 23; 131 | } 132 | break; 133 | case 19: 134 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "e") || ("g" <= c && c <= "z") ){ 135 | next = 10; 136 | } else if(("f" === c )){ 137 | next = 24; 138 | } 139 | break; 140 | case 20: 141 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "m") || ("o" <= c && c <= "z") ){ 142 | next = 10; 143 | } else if(("n" === c )){ 144 | next = 25; 145 | } 146 | break; 147 | case 21: 148 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "f") || ("h" <= c && c <= "z") ){ 149 | next = 10; 150 | } else if(("g" === c )){ 151 | next = 26; 152 | } 153 | break; 154 | case 23: 155 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "b") || ("d" <= c && c <= "z") ){ 156 | next = 8; 157 | } else if(("c" === c )){ 158 | next = 27; 159 | } 160 | break; 161 | case 24: 162 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "s") || ("u" <= c && c <= "z") ){ 163 | next = 10; 164 | } else if(("t" === c )){ 165 | next = 28; 166 | } 167 | break; 168 | case 25: 169 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("b" <= c && c <= "z") ){ 170 | next = 10; 171 | } else if(("a" === c )){ 172 | next = 29; 173 | } 174 | break; 175 | case 26: 176 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "g") || ("i" <= c && c <= "z") ){ 177 | next = 10; 178 | } else if(("h" === c )){ 179 | next = 24; 180 | } 181 | break; 182 | case 27: 183 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "s") || ("u" <= c && c <= "z") ){ 184 | next = 8; 185 | } else if(("t" === c )){ 186 | next = 31; 187 | } 188 | break; 189 | case 28: 190 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 191 | next = 10; 192 | } 193 | break; 194 | case 29: 195 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "r") || ("t" <= c && c <= "z") ){ 196 | next = 10; 197 | } else if(("s" === c )){ 198 | next = 32; 199 | } 200 | break; 201 | case 31: 202 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "h") || ("j" <= c && c <= "z") ){ 203 | next = 8; 204 | } else if(("i" === c )){ 205 | next = 34; 206 | } 207 | break; 208 | case 32: 209 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "r") || ("t" <= c && c <= "z") ){ 210 | next = 10; 211 | } else if(("s" === c )){ 212 | next = 35; 213 | } 214 | break; 215 | case 34: 216 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "n") || ("p" <= c && c <= "z") ){ 217 | next = 8; 218 | } else if(("o" === c )){ 219 | next = 36; 220 | } 221 | break; 222 | case 35: 223 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "n") || ("p" <= c && c <= "z") ){ 224 | next = 10; 225 | } else if(("o" === c )){ 226 | next = 37; 227 | } 228 | break; 229 | case 36: 230 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "m") || ("o" <= c && c <= "z") ){ 231 | next = 8; 232 | } else if(("n" === c )){ 233 | next = 38; 234 | } 235 | break; 236 | case 37: 237 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "b") || ("d" <= c && c <= "z") ){ 238 | next = 10; 239 | } else if(("c" === c )){ 240 | next = 28; 241 | } 242 | break; 243 | case 38: 244 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 245 | next = 8; 246 | } 247 | break; 248 | } 249 | return next; 250 | }; 251 | 252 | function CDFA_BLOCKCOMMENT(){ 253 | this.ss=1; 254 | this.as=[2,3,4,5,6]; 255 | this.tt=[null,null,2,2,2,2,1]; 256 | this.stt={}; 257 | } 258 | CDFA_BLOCKCOMMENT.prototype= new CDFA_base(); 259 | CDFA_BLOCKCOMMENT.prototype.nextState = function(state, c){ 260 | var next = 0; 261 | switch(state){ 262 | case 1: 263 | if((c < "\n" || "\n" < c) && (c < "\r" || "\r" < c) && (c < "*" || "*" < c) ){ 264 | next = 2; 265 | } else if(("\n" === c )){ 266 | next = 2; 267 | } else if(("\r" === c )){ 268 | next = 2; 269 | } else if(("*" === c )){ 270 | next = 5; 271 | } 272 | break; 273 | case 5: 274 | if(("/" === c )){ 275 | next = 6; 276 | } 277 | break; 278 | } 279 | return next; 280 | }; 281 | 282 | function CDFA_LINECOMMENT(){ 283 | this.ss=1; 284 | this.as=[1,2,3]; 285 | this.tt=[null,4,4,5]; 286 | this.stt={}; 287 | } 288 | CDFA_LINECOMMENT.prototype= new CDFA_base(); 289 | CDFA_LINECOMMENT.prototype.nextState = function(state, c){ 290 | var next = 0; 291 | switch(state){ 292 | case 1: 293 | if((c < "\n" || "\n" < c) ){ 294 | next = 2; 295 | } else if(("\n" === c )){ 296 | next = 3; 297 | } 298 | break; 299 | case 2: 300 | if((c < "\n" || "\n" < c) ){ 301 | next = 2; 302 | } 303 | break; 304 | } 305 | return next; 306 | }; 307 | 308 | function CDFA_FUNCTION(){ 309 | this.ss=1; 310 | this.as=[1,2,3,4]; 311 | this.tt=[null,10,10,11,12]; 312 | this.stt={}; 313 | } 314 | CDFA_FUNCTION.prototype= new CDFA_base(); 315 | CDFA_FUNCTION.prototype.nextState = function(state, c){ 316 | var next = 0; 317 | switch(state){ 318 | case 1: 319 | if((c < "{" || "{" < c) && (c < "}" || "}" < c) ){ 320 | next = 2; 321 | } else if(("{" === c )){ 322 | next = 3; 323 | } else if(("}" === c )){ 324 | next = 4; 325 | } 326 | break; 327 | case 2: 328 | if((c < "{" || "{" < c) && (c < "}" || "}" < c) ){ 329 | next = 2; 330 | } 331 | break; 332 | } 333 | return next; 334 | }; 335 | 336 | var EOF={}; 337 | function Lexer(){ 338 | 339 | if(!(this instanceof Lexer)) return new Lexer(); 340 | 341 | this.pos={line:0,col:0}; 342 | 343 | this.states={}; 344 | this.state = ['DEFAULT']; 345 | this.lastChar = '\n'; 346 | this.actions = [function anonymous() { 347 | this.pushState('BLOCKCOMMENT'); 348 | },function anonymous() { 349 | this.popState(); 350 | },,function anonymous() { 351 | this.pushState('LINECOMMENT'); 352 | },,function anonymous() { 353 | this.popState(); 354 | },function anonymous() { 355 | this.jjval = this.jjtext.substring(1); return 'Op'; 356 | },function anonymous() { 357 | this.jjval = this.jjtext.substring(1); return 'Directive'; 358 | },function anonymous() { 359 | this.jjval = this.jjtext.substring(1,this.jjtext.length-1); return 'Terminal'; 360 | },function anonymous() { 361 | this.func=this.jjtext;this.blocklevel=0; this.pushState('FUNCTION'); 362 | },function anonymous() { 363 | this.func+=this.jjtext; 364 | },function anonymous() { 365 | this.func+=this.jjtext; this.blocklevel++ 366 | },function anonymous() { 367 | this.func+=this.jjtext; this.blocklevel--; if(this.blocklevel===0) {this.popState(); this.jjtext = this.jjval = this.func; return 'function'; } 368 | },function anonymous() { 369 | return 'id'; 370 | },function anonymous() { 371 | //mah 372 | },function anonymous() { 373 | return this.jjtext; 374 | }]; 375 | this.states["DEFAULT"] = {}; 376 | this.states["DEFAULT"].dfa = new CDFA_DEFAULT(); 377 | this.states["BLOCKCOMMENT"] = {}; 378 | this.states["BLOCKCOMMENT"].dfa = new CDFA_BLOCKCOMMENT(); 379 | this.states["LINECOMMENT"] = {}; 380 | this.states["LINECOMMENT"].dfa = new CDFA_LINECOMMENT(); 381 | this.states["FUNCTION"] = {}; 382 | this.states["FUNCTION"].dfa = new CDFA_FUNCTION(); 383 | } 384 | Lexer.prototype.setInput=function (input){ 385 | this.pos={row:0, col:0}; 386 | if(typeof input === 'string') 387 | {input = new StringReader(input);} 388 | this.input = input; 389 | this.state = ['DEFAULT']; 390 | this.lastChar='\n'; 391 | this.getDFA().reset(); 392 | return this; 393 | }; 394 | Lexer.prototype.nextToken=function () { 395 | 396 | 397 | var ret = undefined; 398 | while(ret === undefined){ 399 | this.resetToken(); 400 | ret = this.more(); 401 | } 402 | 403 | 404 | if (ret === EOF) { 405 | this.current = EOF; 406 | } else { 407 | this.current = {}; 408 | this.current.name = ret; 409 | this.current.value = this.jjval; 410 | this.current.lexeme = this.jjtext; 411 | this.current.position = this.jjpos; 412 | this.current.pos = {col: this.jjcol, line: this.jjline}; 413 | } 414 | return this.current; 415 | }; 416 | Lexer.prototype.resetToken=function (){ 417 | this.getDFA().reset(); 418 | this.getDFA().bol = (this.lastChar === '\n'); 419 | this.lastValid = undefined; 420 | this.lastValidPos = -1; 421 | this.jjtext = ''; 422 | this.remains = ''; 423 | this.buffer = ''; 424 | this.jjline = this.input.line; 425 | this.jjcol = this.input.col; 426 | }; 427 | Lexer.prototype.halt=function () { 428 | if (this.lastValidPos >= 0) { 429 | this.jjtext = this.buffer.substring(0, this.lastValidPos + 1); 430 | this.remains = this.buffer.substring(this.lastValidPos + 1); 431 | this.jjval = this.jjtext; 432 | this.jjpos = this.lastValidPos + 1-this.jjtext.length; 433 | this.input.rollback(this.remains); 434 | var action = this.getAction(this.lastValid); 435 | if (typeof ( action) === 'function') { 436 | return action.call(this); 437 | } 438 | this.resetToken(); 439 | } 440 | else if(!this.input.more()){//EOF 441 | var actionid = this.states[this.getState()].eofaction; 442 | if(actionid){ 443 | action = this.getAction(actionid); 444 | if (typeof ( action) === 'function') { 445 | //Note we don't care of returned token, must return 'EOF' 446 | action.call(this); 447 | } 448 | } 449 | return EOF; 450 | } else {//Unexpected character 451 | throw new Error('Unexpected char \''+this.input.peek()+'\' at '+this.jjline +':'+this.jjcol); 452 | } 453 | }; 454 | Lexer.prototype.more=function (){ 455 | var ret; 456 | while (this.input.more()) { 457 | var c = this.input.peek(); 458 | this.getDFA().readSymbol(c); 459 | if (this.getDFA().isInDeadState()) { 460 | 461 | ret = this.halt(); 462 | return ret; 463 | 464 | } else { 465 | if (this.getDFA().isAccepting()) { 466 | this.lastValid = this.getDFA().getCurrentToken(); 467 | this.lastValidPos = this.input.getPos(); 468 | 469 | } 470 | this.buffer = this.buffer + c; 471 | this.lastChar = c; 472 | this.input.next(); 473 | } 474 | 475 | } 476 | ret = this.halt(); 477 | return ret; 478 | }; 479 | Lexer.prototype.less=function (length){ 480 | this.input.rollback(length); 481 | }; 482 | Lexer.prototype.getDFA=function (){ 483 | return this.states[this.getState()].dfa; 484 | }; 485 | Lexer.prototype.getAction=function (i){ 486 | return this.actions[i]; 487 | }; 488 | Lexer.prototype.pushState=function (state){ 489 | this.state.push(state); 490 | this.getDFA().reset(); 491 | }; 492 | Lexer.prototype.popState=function (){ 493 | if(this.state.length>1) { 494 | this.state.pop(); 495 | this.getDFA().reset(); 496 | } 497 | }; 498 | Lexer.prototype.getState=function (){ 499 | return this.state[this.state.length-1]; 500 | }; 501 | Lexer.prototype.restoreLookAhead=function (){ 502 | this.tailLength = this.jjtext.length; 503 | this.popState(); 504 | this.less(this.tailLength); 505 | this.jjtext = this.lawhole.substring(0,this.lawhole.length-this.tailLength); 506 | 507 | 508 | }; 509 | Lexer.prototype.evictTail=function (length){ 510 | this.less(length); 511 | this.jjtext = this.jjtext.substring(0,this.jjtext.length-length); 512 | }; 513 | Lexer.prototype.isEOF=function (o){ 514 | return o===EOF; 515 | } 516 | ; 517 | function StringReader(str){ 518 | if(!(this instanceof StringReader)) return new StringReader(str); 519 | this.str = str; 520 | this.pos = 0; 521 | this.line = 0; 522 | this.col = 0; 523 | } 524 | StringReader.prototype.getPos=function (){ 525 | return this.pos; 526 | }; 527 | StringReader.prototype.peek=function () 528 | { 529 | //TODO: handle EOF 530 | return this.str.charAt(this.pos); 531 | }; 532 | StringReader.prototype.eat=function (str) 533 | { 534 | var istr = this.str.substring(this.pos,this.pos+str.length); 535 | if(istr===str){ 536 | this.pos+=str.length; 537 | this.updatePos(str,1); 538 | } else { 539 | throw new Error('Expected "'+str+'", got "'+istr+'"!'); 540 | } 541 | }; 542 | StringReader.prototype.updatePos=function (str,delta){ 543 | for(var i=0;i0){ 24 | try { 25 | rule.action = new Function(action) 26 | }catch(e){ 27 | throw Error(e.toString() + ' in rule ' + this.tokens.length+1); 28 | } 29 | } 30 | this.tokens.push(rule); 31 | },function (_,list){ 32 | //StatesList 33 | return env.junq(list).flatmap().odd().toArray(); 34 | },function (){return [];},function (){ 35 | return arguments[0].concat(Array.prototype.slice.call(arguments,1)); 36 | },function (){return [];},function (){ 37 | return arguments[0].concat(Array.prototype.slice.call(arguments,1)); 38 | },function (){return [];},function (){ 39 | return arguments[0].concat(Array.prototype.slice.call(arguments,1)); 40 | },function () { 41 | return arguments[0]; 42 | },function () { 43 | return arguments[0]; 44 | },function () { 45 | return undefined; 46 | },function () { 47 | return arguments[0]; 48 | },function () { 49 | return []; 50 | },function () { 51 | return [].slice.apply(arguments); 52 | },function (){return [];},function (){ 53 | return arguments[0].concat(Array.prototype.slice.call(arguments,1)); 54 | }]; 55 | this.startstate=0; 56 | this.symbolsTable={"<>":0,"LexPec":1,"Repeat_0_0":2,"SEPARATOR":3,"Repeat_0_2":4,"Repeat_0_4":5,"Directive":6,"directive":7,"id":8,"Definition":9,"=":10,"regex":11,"TokenRule":12,"StatesList":13,"Group3_6":14,"Optional_3_8":15,"<":16,"Optional_4_10":17,">":18,"actionblock":19,"Repeat_16_0":20,",":21}; 57 | this.actionMode='function'; 58 | } 59 | Parser.prototype.identity=function (x) { 60 | "use strict"; 61 | return x; 62 | }; 63 | Parser.prototype.parse=function (lexer, context) { 64 | this.stack = []; 65 | this.context = context || {}; 66 | 67 | this.lexer = lexer; 68 | this.a = this.lexer.nextToken(); 69 | this.stack.push({s: this.startstate, i: 0}); 70 | this.accepted = false; 71 | this.inerror = false; 72 | while (!this.accepted && !this.inerror) { 73 | var top = this.stack[this.stack.length - 1]; 74 | var s = top.s; 75 | //this.a = this.currentToken; 76 | if(lexer.isEOF(this.a)) 77 | this.an = 0; 78 | else 79 | this.an = this.symbolsTable[this.a.name]; 80 | var action = this.action[s][this.an]; 81 | if (action !== undefined) { 82 | this[action[0]].apply(this, action[1]); 83 | } else { 84 | this.inerror = true; 85 | this.error(this.a,this); 86 | } 87 | } 88 | return top.i.value; 89 | }; 90 | Parser.prototype.shift=function (state) { 91 | "use strict"; 92 | this.stack.push({s: state, i: this.a}); 93 | this.a = this.lexer.nextToken(); 94 | 95 | }; 96 | Parser.prototype.reduce=function (head, length, prodindex) { 97 | "use strict"; 98 | //var prod = this.productions[prodnumber]; 99 | var self = this; 100 | var rhs = this.stack.splice(-length, length); 101 | var t = this.stack[this.stack.length - 1]; 102 | var ns = this.goto[t.s][head]; 103 | var value; 104 | if (this.actions) { 105 | var action = this.actions[prodindex] || this.identity; 106 | var values = rhs.map(function (si) { 107 | return si.i.value; 108 | }); 109 | 110 | if(self.actionMode==='constructor') 111 | value = this.create(action,values); 112 | else 113 | value = action.apply(this.context, values); 114 | } 115 | //If we are debugging 116 | 117 | if(this.symbols) { 118 | var nt = {name: this.symbols[head].name, value:value}; 119 | this.stack.push({s: ns, i: nt}); 120 | } 121 | else 122 | { 123 | this.stack.push({s: ns,i:{value: value}}); 124 | } 125 | 126 | }; 127 | Parser.prototype.accept=function () { 128 | "use strict"; 129 | this.accepted = true; 130 | }; 131 | Parser.prototype.error=function (token){ 132 | if(this.lexer.isEOF(token)){ 133 | throw Error("Unexpected EOF at "+this.lexer.jjline+':'+this.lexer.jjcol); 134 | } else 135 | throw Error('Unexpected token '+token.name+' "'+token.lexeme+'" at ('+token.pos.line+':'+token.pos.col+')'); 136 | }; 137 | Parser.prototype.create=function (ctor,args){ 138 | var args = [this.context].concat(args); 139 | var factory = ctor.bind.apply(ctor,args); 140 | return new factory(); 141 | }; 142 | if (typeof(module) !== 'undefined') { module.exports = Parser; } 143 | return Parser; 144 | })(); -------------------------------------------------------------------------------- /lib/parser/JacobLexerLexer.js: -------------------------------------------------------------------------------- 1 | var JacobLexerLexer = (function (undefined) { 2 | function CDFA_base(){ 3 | this.ss=undefined; 4 | this.as=undefined; 5 | this.tt=undefined; 6 | this.stt={}; 7 | } 8 | CDFA_base.prototype.reset = function (state) { 9 | this.cs = state || this.ss; 10 | this.bol=false; 11 | }; 12 | CDFA_base.prototype.readSymbol = function (c) { 13 | this.cs = this.nextState(this.cs, c); 14 | }; 15 | CDFA_base.prototype.isAccepting = function () { 16 | var acc = this.as.indexOf(this.cs)>=0; 17 | if((this.stt[this.cs]===-1)&&!this.bol){ 18 | acc=false;} 19 | return acc;}; 20 | CDFA_base.prototype.isInDeadState = function () { 21 | return this.cs === undefined || this.cs === 0; 22 | }; 23 | CDFA_base.prototype.getCurrentToken = function(){ 24 | var t= this.tt[this.cs]; 25 | var s=this.stt[this.cs]; 26 | if(s!==undefined){return this.bol?t:s;} 27 | return t;}; 28 | 29 | function CDFA_DEFAULT(){ 30 | this.ss=1; 31 | this.as=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; 32 | this.tt=[null,16,17,16,16,17,17,13,14,12,3,10,11,0,7,13]; 33 | this.stt={}; 34 | } 35 | CDFA_DEFAULT.prototype= new CDFA_base(); 36 | CDFA_DEFAULT.prototype.nextState = function(state, c){ 37 | var next = 0; 38 | switch(state){ 39 | case 1: 40 | if((c < "\t" || "\n" < c) && (c < "\r" || "\r" < c) && (c < " " || " " < c) && (c < "%" || "%" < c) && (c < "/" || "9" < c) && (c < "=" || ">" < c) && (c < "A" || "Z" < c) && (c < "_" || "_" < c) && (c < "a" || "{" < c) ){ 41 | next = 2; 42 | } else if(("\t" === c ) || (" " === c )){ 43 | next = 3; 44 | } else if(("\n" === c ) || ("\r" === c )){ 45 | next = 3; 46 | } else if(("%" === c )){ 47 | next = 5; 48 | } else if(("/" === c )){ 49 | next = 6; 50 | } else if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 51 | next = 7; 52 | } else if(("=" === c )){ 53 | next = 8; 54 | } else if((">" === c )){ 55 | next = 9; 56 | } else if(("{" === c )){ 57 | next = 10; 58 | } 59 | break; 60 | case 3: 61 | if(("\t" <= c && c <= "\n") || ("\r" === c ) || (" " === c )){ 62 | next = 3; 63 | } 64 | break; 65 | case 5: 66 | if(("%" === c )){ 67 | next = 11; 68 | } else if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 69 | next = 12; 70 | } 71 | break; 72 | case 6: 73 | if(("*" === c )){ 74 | next = 13; 75 | } else if(("/" === c )){ 76 | next = 14; 77 | } 78 | break; 79 | case 7: 80 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 81 | next = 7; 82 | } 83 | break; 84 | case 12: 85 | if(("0" <= c && c <= "9") || ("A" <= c && c <= "Z") || ("_" === c ) || ("a" <= c && c <= "z") ){ 86 | next = 12; 87 | } 88 | break; 89 | } 90 | return next; 91 | }; 92 | 93 | function CDFA_BLOCKCOMMENT(){ 94 | this.ss=1; 95 | this.as=[2,3,4,5,6]; 96 | this.tt=[null,null,2,2,2,2,1]; 97 | this.stt={}; 98 | } 99 | CDFA_BLOCKCOMMENT.prototype= new CDFA_base(); 100 | CDFA_BLOCKCOMMENT.prototype.nextState = function(state, c){ 101 | var next = 0; 102 | switch(state){ 103 | case 1: 104 | if((c < "\n" || "\n" < c) && (c < "\r" || "\r" < c) && (c < "*" || "*" < c) ){ 105 | next = 2; 106 | } else if(("\n" === c )){ 107 | next = 2; 108 | } else if(("\r" === c )){ 109 | next = 2; 110 | } else if(("*" === c )){ 111 | next = 5; 112 | } 113 | break; 114 | case 5: 115 | if(("/" === c )){ 116 | next = 6; 117 | } 118 | break; 119 | } 120 | return next; 121 | }; 122 | 123 | function CDFA_ACTIONBLOCK(){ 124 | this.ss=1; 125 | this.as=[1,2,3,4]; 126 | this.tt=[null,6,6,4,5]; 127 | this.stt={}; 128 | } 129 | CDFA_ACTIONBLOCK.prototype= new CDFA_base(); 130 | CDFA_ACTIONBLOCK.prototype.nextState = function(state, c){ 131 | var next = 0; 132 | switch(state){ 133 | case 1: 134 | if((c < "{" || "{" < c) && (c < "}" || "}" < c) ){ 135 | next = 2; 136 | } else if(("{" === c )){ 137 | next = 3; 138 | } else if(("}" === c )){ 139 | next = 4; 140 | } 141 | break; 142 | case 2: 143 | if((c < "{" || "{" < c) && (c < "}" || "}" < c) ){ 144 | next = 2; 145 | } 146 | break; 147 | } 148 | return next; 149 | }; 150 | 151 | function CDFA_LINECOMMENT(){ 152 | this.ss=1; 153 | this.as=[1,2,3]; 154 | this.tt=[null,8,8,9]; 155 | this.stt={}; 156 | } 157 | CDFA_LINECOMMENT.prototype= new CDFA_base(); 158 | CDFA_LINECOMMENT.prototype.nextState = function(state, c){ 159 | var next = 0; 160 | switch(state){ 161 | case 1: 162 | if((c < "\n" || "\n" < c) ){ 163 | next = 2; 164 | } else if(("\n" === c )){ 165 | next = 3; 166 | } 167 | break; 168 | case 2: 169 | if((c < "\n" || "\n" < c) ){ 170 | next = 2; 171 | } 172 | break; 173 | } 174 | return next; 175 | }; 176 | 177 | function CDFA_RE(){ 178 | this.ss=1; 179 | this.as=[1,2,3]; 180 | this.tt=[null,16,15,16]; 181 | this.stt={}; 182 | } 183 | CDFA_RE.prototype= new CDFA_base(); 184 | CDFA_RE.prototype.nextState = function(state, c){ 185 | var next = 0; 186 | switch(state){ 187 | case 1: 188 | if((c < "\t" || "\n" < c) && (c < "\r" || "\r" < c) && (c < " " || " " < c) ){ 189 | next = 2; 190 | } else if(("\t" <= c && c <= "\n") || ("\r" === c ) || (" " === c )){ 191 | next = 3; 192 | } 193 | break; 194 | case 2: 195 | if((c < "\t" || "\n" < c) && (c < "\r" || "\r" < c) && (c < " " || " " < c) ){ 196 | next = 2; 197 | } 198 | break; 199 | case 3: 200 | if(("\t" <= c && c <= "\n") || ("\r" === c ) || (" " === c )){ 201 | next = 3; 202 | } 203 | break; 204 | } 205 | return next; 206 | }; 207 | 208 | var EOF={}; 209 | function Lexer(){ 210 | 211 | if(!(this instanceof Lexer)) return new Lexer(); 212 | 213 | this.pos={line:0,col:0}; 214 | 215 | this.states={}; 216 | this.state = ['DEFAULT']; 217 | this.lastChar = '\n'; 218 | this.actions = [function (){this.pushState('BLOCKCOMMENT');},function (){this.popState();},function (){},function (){ 219 | this.pushState('ACTIONBLOCK'); this.blocklevel=1;this.func='';},function (){ 220 | this.blocklevel++;this.func+='{'},function (){ 221 | this.blocklevel--;if(this.blocklevel===0) { 222 | this.popState(); 223 | this.jjval = this.func; 224 | this.jjtext = this.func; 225 | return 'actionblock'; 226 | }else{ 227 | this.func+='}'; 228 | }},function (){ 229 | this.func+=this.jjtext; },function (){ 230 | this.pushState('LINECOMMENT');},function (){},function (){this.popState();},function () { 231 | this.jjval = (this.jjtext); 232 | return 'SEPARATOR'; 233 | },function () { 234 | this.jjval = this.jjtext.substring(1); 235 | return 'directive'; 236 | },function () { 237 | //this.jjval = this.jjtext.substring(1,this.jjtext.length-1); 238 | this.pushState('RE'); 239 | return this.jjtext; 240 | },function () { 241 | 242 | return 'id'; 243 | },function () { 244 | this.pushState('RE'); 245 | return this.jjtext; 246 | },function () { 247 | this.popState(); 248 | return 'regex'; 249 | },function () { 250 | //ignore spaces 251 | },function () { 252 | return this.jjtext; 253 | },function () { 254 | console.log('end of file'); 255 | return 'EOF'; 256 | }]; 257 | this.states["DEFAULT"] = {}; 258 | this.states["DEFAULT"].dfa = new CDFA_DEFAULT(); 259 | this.states["BLOCKCOMMENT"] = {}; 260 | this.states["BLOCKCOMMENT"].dfa = new CDFA_BLOCKCOMMENT(); 261 | this.states["ACTIONBLOCK"] = {}; 262 | this.states["ACTIONBLOCK"].dfa = new CDFA_ACTIONBLOCK(); 263 | this.states["LINECOMMENT"] = {}; 264 | this.states["LINECOMMENT"].dfa = new CDFA_LINECOMMENT(); 265 | this.states["RE"] = {}; 266 | this.states["RE"].dfa = new CDFA_RE(); 267 | } 268 | Lexer.prototype.setInput=function (input){ 269 | this.pos={row:0, col:0}; 270 | if(typeof input === 'string') 271 | {input = new StringReader(input);} 272 | this.input = input; 273 | this.state = ['DEFAULT']; 274 | this.lastChar='\n'; 275 | this.getDFA().reset(); 276 | return this; 277 | }; 278 | Lexer.prototype.nextToken=function () { 279 | 280 | 281 | var ret = undefined; 282 | while(ret === undefined){ 283 | this.resetToken(); 284 | ret = this.more(); 285 | } 286 | 287 | 288 | if (ret === EOF) { 289 | this.current = EOF; 290 | } else { 291 | this.current = {}; 292 | this.current.name = ret; 293 | this.current.value = this.jjval; 294 | this.current.lexeme = this.jjtext; 295 | this.current.position = this.jjpos; 296 | this.current.pos = {col: this.jjcol, line: this.jjline}; 297 | } 298 | return this.current; 299 | }; 300 | Lexer.prototype.resetToken=function (){ 301 | this.getDFA().reset(); 302 | this.getDFA().bol = (this.lastChar === '\n'); 303 | this.lastValid = undefined; 304 | this.lastValidPos = -1; 305 | this.jjtext = ''; 306 | this.remains = ''; 307 | this.buffer = ''; 308 | this.jjline = this.input.line; 309 | this.jjcol = this.input.col; 310 | }; 311 | Lexer.prototype.halt=function () { 312 | if (this.lastValidPos >= 0) { 313 | this.jjtext = this.buffer.substring(0, this.lastValidPos + 1); 314 | this.remains = this.buffer.substring(this.lastValidPos + 1); 315 | this.jjval = this.jjtext; 316 | this.jjpos = this.lastValidPos + 1-this.jjtext.length; 317 | this.input.rollback(this.remains); 318 | var action = this.getAction(this.lastValid); 319 | if (typeof ( action) === 'function') { 320 | return action.call(this); 321 | } 322 | this.resetToken(); 323 | } 324 | else if(!this.input.more()){//EOF 325 | var actionid = this.states[this.getState()].eofaction; 326 | if(actionid){ 327 | action = this.getAction(actionid); 328 | if (typeof ( action) === 'function') { 329 | //Note we don't care of returned token, must return 'EOF' 330 | action.call(this); 331 | } 332 | } 333 | return EOF; 334 | } else {//Unexpected character 335 | throw new Error('Unexpected char \''+this.input.peek()+'\' at '+this.jjline +':'+this.jjcol); 336 | } 337 | }; 338 | Lexer.prototype.more=function (){ 339 | var ret; 340 | while (this.input.more()) { 341 | var c = this.input.peek(); 342 | this.getDFA().readSymbol(c); 343 | if (this.getDFA().isInDeadState()) { 344 | 345 | ret = this.halt(); 346 | return ret; 347 | 348 | } else { 349 | if (this.getDFA().isAccepting()) { 350 | this.lastValid = this.getDFA().getCurrentToken(); 351 | this.lastValidPos = this.input.getPos(); 352 | 353 | } 354 | this.buffer = this.buffer + c; 355 | this.lastChar = c; 356 | this.input.next(); 357 | } 358 | 359 | } 360 | ret = this.halt(); 361 | return ret; 362 | }; 363 | Lexer.prototype.less=function (length){ 364 | this.input.rollback(length); 365 | }; 366 | Lexer.prototype.getDFA=function (){ 367 | return this.states[this.getState()].dfa; 368 | }; 369 | Lexer.prototype.getAction=function (i){ 370 | return this.actions[i]; 371 | }; 372 | Lexer.prototype.pushState=function (state){ 373 | this.state.push(state); 374 | this.getDFA().reset(); 375 | }; 376 | Lexer.prototype.popState=function (){ 377 | if(this.state.length>1) { 378 | this.state.pop(); 379 | this.getDFA().reset(); 380 | } 381 | }; 382 | Lexer.prototype.getState=function (){ 383 | return this.state[this.state.length-1]; 384 | }; 385 | Lexer.prototype.restoreLookAhead=function (){ 386 | this.tailLength = this.jjtext.length; 387 | this.popState(); 388 | this.less(this.tailLength); 389 | this.jjtext = this.lawhole.substring(0,this.lawhole.length-this.tailLength); 390 | 391 | 392 | }; 393 | Lexer.prototype.evictTail=function (length){ 394 | this.less(length); 395 | this.jjtext = this.jjtext.substring(0,this.jjtext.length-length); 396 | }; 397 | Lexer.prototype.isEOF=function (o){ 398 | return o===EOF; 399 | } 400 | ; 401 | function StringReader(str){ 402 | if(!(this instanceof StringReader)) return new StringReader(str); 403 | this.str = str; 404 | this.pos = 0; 405 | this.line = 0; 406 | this.col = 0; 407 | } 408 | StringReader.prototype.getPos=function (){ 409 | return this.pos; 410 | }; 411 | StringReader.prototype.peek=function () 412 | { 413 | //TODO: handle EOF 414 | return this.str.charAt(this.pos); 415 | }; 416 | StringReader.prototype.eat=function (str) 417 | { 418 | var istr = this.str.substring(this.pos,this.pos+str.length); 419 | if(istr===str){ 420 | this.pos+=str.length; 421 | this.updatePos(str,1); 422 | } else { 423 | throw new Error('Expected "'+str+'", got "'+istr+'"!'); 424 | } 425 | }; 426 | StringReader.prototype.updatePos=function (str,delta){ 427 | for(var i=0;i0){ 27 | max = this.operators[this.operators.length-1][2]; 28 | } 29 | max = max + 100; 30 | for(var i=0;i\/\* {this.pushState('BLOCKCOMMENT');} 8 | \*\/ {this.popState();} 9 | (\n|\r|.) {} 10 | 11 | <>\/\/ {this.pushState('LINECOMMENT');} 12 | [^\n]* {} 13 | \n {this.popState();} 14 | 15 | <>%left|%right|%nonassoc { this.jjval = this.jjtext.substring(1); return 'Op';} 16 | <>%\w+ {this.jjval = this.jjtext.substring(1); return 'Directive'; } 17 | <>'[^'|\']+' { this.jjval = this.jjtext.substring(1,this.jjtext.length-1); return 'Terminal'; } 18 | <>function {this.func=this.jjtext;this.blocklevel=0; this.pushState('FUNCTION');} 19 | [^\{\}]* {this.func+=this.jjtext;} 20 | [{] {this.func+=this.jjtext; this.blocklevel++} 21 | [}] {this.func+=this.jjtext; this.blocklevel--; if(this.blocklevel===0) {this.popState(); this.jjtext = this.jjval = this.func; return 'function'; }} 22 | <>\w+ { return 'id'; } 23 | <>\s { //mah } 24 | <>. { return this.jjtext; } -------------------------------------------------------------------------------- /lib/parser/lexgram.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 22/08/2014. 3 | */ 4 | 5 | (function() { 6 | var JacobLexGrammar = { 7 | tokens: ['SEPARATOR', 'directive','regex', '=','state','id','actionblock','<','>',','], 8 | 9 | productions: [ 10 | ['LexPec', [parser.Repeat('Directive'),'SEPARATOR', 11 | parser.Repeat('Definition'),'SEPARATOR', 12 | parser.Repeat('TokenRule')], 13 | function (directives,_1, definitions,_2, rules) { 14 | 15 | } 16 | ], 17 | ['Directive', ['directive', 'id'], 18 | function (d, id) { 19 | this[d] = id; 20 | } 21 | ], 22 | 23 | ['Definition', ['id','=', 'regex'], 24 | function (def, _, re) { 25 | this.definitions = this.definitions || {}; 26 | this.definitions[def] = re; 27 | } 28 | ], 29 | ['TokenRule', ['StatesList',parser.Group(['regex'],['id']), parser.Optional('actionblock')], 30 | function (state, re, action) { 31 | if((typeof state != 'undefined') && state.length===0){ 32 | state = undefined; 33 | } 34 | this.tokens = this.tokens || []; 35 | var rule = {}; 36 | rule.regexp = re; 37 | rule.state = state; 38 | rule.action = undefined; 39 | if( (typeof action != 'undefined') && action.length>0){ 40 | try { 41 | rule.action = new Function(action) 42 | }catch(e){ 43 | throw Error(e.toString() + ' in rule ' + this.tokens.length+1); 44 | } 45 | } 46 | this.tokens.push(rule); 47 | } 48 | ], 49 | ['StatesList',['<',parser.Optional('id',parser.Repeat(',','id')),'>'],function(_,list){ 50 | //StatesList 51 | return env.junq(list).flatmap().odd().toArray(); 52 | }] 53 | 54 | ], 55 | moduleName: 'JacobLexInterpreter' 56 | 57 | }; 58 | return JacobLexGrammar; 59 | })(); -------------------------------------------------------------------------------- /lib/parser/lexlex.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 22/08/2014. 3 | */ 4 | 5 | (function() { 6 | var tokenspecs = { 7 | tokens: [ 8 | 9 | {'regexp': '\\/\\*', action: function(){this.pushState('BLOCKCOMMENT');}},//0 10 | {'regexp': '\\*\\/', action: function(){this.popState();}, state:'BLOCKCOMMENT'},//1 11 | {'regexp': '(\\n|\\r|.)', action: function(){}, state:'BLOCKCOMMENT'},//2 12 | {'regexp': '\\{', action: function(){ 13 | this.pushState('ACTIONBLOCK'); this.blocklevel=1;this.func='';}},//3 14 | {'regexp': '\\{', action: function(){ 15 | this.blocklevel++;this.func+='{'}, state:'ACTIONBLOCK'},//3 16 | 17 | {'regexp': '\\}', action: function(){ 18 | this.blocklevel--;if(this.blocklevel===0) { 19 | this.popState(); 20 | this.jjval = this.func; 21 | this.jjtext = this.func; 22 | return 'actionblock'; 23 | }else{ 24 | this.func+='}'; 25 | }}, state:'ACTIONBLOCK'},//4 26 | {'regexp': '[^\\}\\{]*', action: function(){ 27 | this.func+=this.jjtext; }, state:'ACTIONBLOCK'},//5 28 | {'regexp': '\\/\\/', action: function(){ 29 | this.pushState('LINECOMMENT');}},//6 30 | {'regexp': '[^\\n]*', action: function(){}, state:'LINECOMMENT'},//7 31 | {'regexp': '\\n', action: function(){this.popState();}, state:'LINECOMMENT'},//8 32 | //TODO: line comment 33 | {'regexp': '%%', action: function () { 34 | this.jjval = (this.jjtext); 35 | return 'SEPARATOR'; 36 | }}, 37 | {'regexp': '%\\w+', action: function () { 38 | this.jjval = this.jjtext.substring(1); 39 | return 'directive'; 40 | }}, 41 | {'regexp': '>', action: function () { 42 | //this.jjval = this.jjtext.substring(1,this.jjtext.length-1); 43 | this.pushState('RE'); 44 | return this.jjtext; 45 | }}, 46 | {'regexp': '\\w+', action: function () { 47 | 48 | return 'id'; 49 | }}, 50 | { 'regexp': '=', action: function () { 51 | this.pushState('RE'); 52 | return this.jjtext; 53 | }}, 54 | {'regexp': '[^\\s\\n\\r]+', action: function () { 55 | this.popState(); 56 | return 'regex'; 57 | }, state:'RE'}, 58 | { 'regexp': '\\s*', action: function () { 59 | //ignore spaces 60 | },state:['RE','DEFAULT']}, 61 | { 'regexp': '.', action: function () { 62 | return this.jjtext; 63 | }}, 64 | { 'regexp': '$', action: function () { 65 | console.log('end of file'); 66 | return 'EOF'; 67 | }} 68 | ], 69 | moduleName: 'JacobLexerLexer' 70 | }; 71 | return tokenspecs; 72 | })(); 73 | -------------------------------------------------------------------------------- /lib/regex.js: -------------------------------------------------------------------------------- 1 | var junq = junq || require('junq'); 2 | var sets = sets || require('junq/sets'); 3 | var StringReader = StringReader || require('./stringreader'); 4 | var automata = automata || require('./automata'); 5 | //TODO: negation in ranges 6 | //TODO: multiple ranges inside squares 7 | 8 | var regex; 9 | (function (regex,dfa, StringReader, undefined) { 10 | 11 | 12 | function RegPart() { 13 | 14 | } 15 | 16 | RegPart.prototype.getPrecedence = function () { 17 | throw new Error('Should not evaluate this!'); 18 | }; 19 | RegPart.prototype.toNFA = function () { 20 | throw new Error('Should not evaluate this!'); 21 | }; 22 | RegPart.prototype.matches = function (str) { 23 | return this.toNFA().matches(str); 24 | }; 25 | RegPart.prototype.isEmpty = function () { 26 | return false; 27 | }; 28 | RegPart.prototype.isLookAhead = function () { 29 | return false; 30 | }; 31 | 32 | RegPart.prototype.getMinMaxLength = function () { 33 | return {min:0, max:Infinity}; 34 | }; 35 | 36 | 37 | 38 | function Empty() { 39 | 40 | } 41 | 42 | Empty.prototype = new RegPart(); 43 | Empty.prototype.getPrecedence = function () { 44 | return 3; 45 | }; 46 | Empty.prototype.isEmpty = function () { 47 | return true; 48 | }; 49 | Empty.prototype.toNFA = function () { 50 | var start = new dfa.State(undefined, 'EmptyStart'); 51 | var accept = new dfa.State(undefined, 'EmptyAceppt'); 52 | 53 | var rb = new dfa.NDRuleBook([new dfa.Rule(start, dfa.eps, accept)]); 54 | 55 | var specs = {rulebook: rb, acceptstates: [accept], startstate: start}; 56 | 57 | return new dfa.NFA(specs); 58 | }; 59 | 60 | Empty.prototype.toString = function () { 61 | return ''; 62 | }; 63 | 64 | Empty.prototype.getMinMaxLength = function () { 65 | return {min:0, max:0}; 66 | }; 67 | 68 | regex.Empty = Empty; 69 | 70 | 71 | 72 | function Character(character) { 73 | this.character = character; 74 | } 75 | 76 | Character.prototype = new RegPart(); 77 | Character.prototype.getPrecedence = function () { 78 | return 3; 79 | }; 80 | 81 | Character.prototype.toString = function () { 82 | return this.character.toString(); 83 | }; 84 | Character.prototype.toNFA = function () { 85 | var newStart = new dfa.State(undefined, 'start \'' + this.character + '\''); 86 | var newEnd = new dfa.State(undefined, 'stop \'' + this.character + '\''); 87 | var accepting = [newEnd]; 88 | //var rb = new dfa.NDRuleBook([new dfa.Rule(newStart, new dfa.InputChar(this.character), newEnd)]); 89 | var rb = new dfa.NDRuleBook( 90 | [new dfa.Rule(newStart, new dfa.InputRange(this.character,this.character), newEnd)] 91 | ); 92 | var specs = {rulebook:rb, acceptstates:accepting, startstate:newStart}; 93 | return new dfa.NFA(specs); 94 | }; 95 | 96 | Character.prototype.getMinMaxLength = function () { 97 | return {min:1, max:1}; 98 | }; 99 | 100 | regex.Character = Character; 101 | 102 | function CharactersRange(from,to) { 103 | this.from=from; 104 | this.to = to; 105 | //note: it will put itself as input! 106 | this.character = this; 107 | } 108 | 109 | 110 | 111 | CharactersRange.prototype = new Character(); 112 | 113 | CharactersRange.prototype.setNegate = function(negate){ 114 | this.negate = negate; 115 | if(this.next) 116 | { 117 | this.next.setNegate(negate); 118 | } 119 | }; 120 | 121 | CharactersRange.prototype.getPrecedence = function () { 122 | return 3; 123 | }; 124 | 125 | CharactersRange.prototype.append = function (range) { 126 | if(!this.next) this.next = range; 127 | else this.next.append(range); 128 | }; 129 | 130 | 131 | CharactersRange.prototype.toString = function () { 132 | return '[' + this.toStringInternal() 133 | + ']'; 134 | }; 135 | CharactersRange.prototype.toStringInternal = function () { 136 | var str = this.negate ? '^' : ''; 137 | str = str + ((this.from < this.to) ? this.from + '-' + this.to : this.from); 138 | if(this.next) return str+this.next.toStringInternal(); 139 | return str; 140 | }; 141 | 142 | CharactersRange.prototype.toNFA = function () { 143 | var newStart = new dfa.State(undefined, 'start reading\'' + this.character + '\''); 144 | var newEnd = new dfa.State(undefined, 'read\'' + this.character + '\''); 145 | var accepting = [newEnd]; 146 | var base; 147 | var cr = this, ir = base = {}, negate = this.negate; 148 | while(cr){ 149 | if(!negate){ 150 | ir.next = new dfa.InputRange(cr.from,cr.to); 151 | } else { 152 | //it's a complement range, we split into its two positive ones 153 | //TODO: guard aginst going over range 154 | /* 155 | var lower = new dfa.InputRange(dfa.FIRSTCHAR,String.fromCharCode((cr.from.charCodeAt(0)-1)),negate); 156 | var upper = new dfa.InputRange(String.fromCharCode((cr.to.charCodeAt(0)+1)),dfa.LASTCHAR,negate); 157 | ir.next = lower; 158 | lower.next = upper; 159 | ir = lower; 160 | */ 161 | ir.next = new dfa.InputRange(cr.from,cr.to,negate); 162 | } 163 | cr = cr.next; 164 | ir=ir.next 165 | } 166 | 167 | var rb = new dfa.NDRuleBook([new dfa.Rule(newStart, base.next, newEnd)]); 168 | var specs = {rulebook:rb, acceptstates:accepting, startstate:newStart}; 169 | return new dfa.NFA(specs); 170 | }; 171 | 172 | regex.CharactersRange = CharactersRange; 173 | 174 | function Composite() { 175 | 176 | } 177 | 178 | Composite.prototype = new RegPart(); 179 | Composite.prototype.printSubExp = function (subexp) { 180 | if (this.getPrecedence() > subexp.getPrecedence()) { 181 | return '(' + subexp.toString() + ')'; 182 | } else { 183 | return subexp.toString(); 184 | } 185 | }; 186 | 187 | function Concat(first, second) { 188 | this.first = first; 189 | this.second = second; 190 | } 191 | 192 | Concat.prototype = new Composite(); 193 | 194 | 195 | Concat.prototype.getPrecedence = function () { 196 | return 1; 197 | }; 198 | Concat.prototype.toString = function () { 199 | return this.printSubExp(this.first) + this.printSubExp(this.second); 200 | }; 201 | 202 | 203 | Concat.prototype.toNFA = function () { 204 | 205 | var firstNFA = this.first.toNFA(); 206 | if(this.second.toNFA == undefined){ 207 | debugger; 208 | } 209 | 210 | var secondNFA = this.second.toNFA(); 211 | 212 | if (this.first.isEmpty()) return secondNFA; 213 | if (this.second.isEmpty()) return firstNFA; 214 | var startstate = firstNFA.startstate; 215 | var acceptstates = secondNFA.acceptstates; 216 | var newRules = junq(firstNFA.acceptstates).map(function (state) { 217 | return new dfa.Rule(state, dfa.eps, secondNFA.startstate); 218 | }); //no need to have an array here 219 | 220 | 221 | var rules = junq(firstNFA.getRules()).append(newRules).append(secondNFA.getRules()).toArray(); 222 | var rb = new dfa.NDRuleBook(rules); 223 | var specs = {rulebook:rb, acceptstates:acceptstates, startstate:startstate}; 224 | var nfa = new dfa.NFA(specs); 225 | return nfa; 226 | }; 227 | 228 | Concat.prototype.getMinMaxLength = function () { 229 | var firstMinMax = this.first.getMinMaxLength(); 230 | var secondMinMax = this.second.getMinMaxLength(); 231 | return {min:firstMinMax.min+secondMinMax.min, max:firstMinMax.max+secondMinMax.max}; 232 | }; 233 | 234 | regex.Concat = Concat; 235 | 236 | function Choice(either, or) { 237 | this.either = either; 238 | this.or = or; 239 | } 240 | 241 | Choice.prototype = new Composite(); 242 | 243 | Choice.prototype.getPrecedence = function () { 244 | return 0; 245 | }; 246 | Choice.prototype.toString = function () { 247 | return this.printSubExp(this.either) + '|' + this.printSubExp(this.or); 248 | }; 249 | 250 | //TODO: check for empty to optimize 251 | Choice.prototype.toNFA = function () { 252 | var eitherNFA = this.either.toNFA(); 253 | var orNFA = this.or.toNFA(); 254 | var start = new dfa.State(undefined, 'Choice start'); 255 | //var accept = new dfa.State(undefined, 'Choice accept'); 256 | var newRules = junq([ 257 | new dfa.Rule(start, dfa.eps, eitherNFA.startstate), 258 | new dfa.Rule(start, dfa.eps, orNFA.startstate) 259 | ]) 260 | .append(eitherNFA.getRules()) 261 | .append(orNFA.getRules()) 262 | 263 | .toArray(); 264 | var acceptstates = eitherNFA.acceptstates.union(orNFA.acceptstates); 265 | var specs = {rulebook:new dfa.NDRuleBook(newRules), acceptstates:acceptstates, startstate:start}; 266 | var nfa = new dfa.NFA(specs); 267 | return nfa; 268 | }; 269 | 270 | Choice.prototype.getMinMaxLength = function () { 271 | var eitherMinMax = this.either.getMinMaxLength(); 272 | var orMinMax = this.or.getMinMaxLength(); 273 | return {min:(eitherMinMax.minorMinMax.max?eitherMinMax.max:orMinMax.max)}; 275 | }; 276 | 277 | regex.Choice = Choice; 278 | 279 | function Repeat(exp, pattern) { 280 | this.exp = exp; 281 | this.pattern = pattern || '*'; 282 | } 283 | 284 | Repeat.prototype = new Composite(); 285 | Repeat.prototype.getPrecedence = function () { 286 | return 2; 287 | }; 288 | 289 | Repeat.prototype.toString = function () { 290 | return this.printSubExp(this.exp) + this.pattern; 291 | }; 292 | 293 | Repeat.prototype.toNFA = function () { 294 | var expNFA = this.exp.toNFA(); 295 | 296 | 297 | var start = new dfa.State(); 298 | var accept = expNFA.acceptstates; 299 | if(this.pattern==='*') accept=accept.union(start); 300 | var rules = 301 | junq(expNFA.getRules()) 302 | .append( 303 | 304 | new dfa.Rule(start, dfa.eps, expNFA.startstate) 305 | ) 306 | .append(junq(expNFA.acceptstates).map(function(as){ 307 | return new dfa.Rule(as,dfa.eps,expNFA.startstate); 308 | })) 309 | .toArray(); 310 | var rb = new dfa.NDRuleBook(rules); 311 | var specs = {rulebook:rb, acceptstates:accept, startstate:start}; 312 | var nfa = new dfa.NFA(specs); 313 | return nfa; 314 | }; 315 | 316 | Repeat.prototype.getMinMaxLength = function () { 317 | 318 | return {min:(this.pattern==='+'?this.exp.getMinMaxLength().min:0), max:Infinity}; 319 | }; 320 | 321 | regex.Repeat = Repeat; 322 | 323 | function ZeroOrOne(exp) { 324 | this.pattern = '?'; 325 | this.exp = exp; 326 | 327 | 328 | } 329 | 330 | ZeroOrOne.prototype = new Repeat(); 331 | 332 | ZeroOrOne.prototype.toNFA = function () { 333 | 334 | var expNFA = this.exp.toNFA(); 335 | var start = expNFA.startstate; 336 | 337 | var accept = expNFA.acceptstates; 338 | 339 | var newRules = junq(expNFA.getRules()) 340 | .append(junq(expNFA.acceptstates).map(function (state) { 341 | return new dfa.Rule(start, dfa.eps, state); 342 | }) 343 | ) 344 | .toArray(); 345 | var specs = {rulebook:new dfa.NDRuleBook(newRules), acceptstates:accept, startstate:start}; 346 | var nfa = new dfa.NFA(specs); 347 | return nfa; 348 | }; 349 | 350 | ZeroOrOne.prototype.getMinMaxLength = function () { 351 | return {min:0, max:this.exp.getMinMaxLength().max}; 352 | }; 353 | 354 | function Interval(base, from, to) { 355 | this.exp = base; 356 | this.from = from=this.from){ 394 | accept = accept.union(expNFA.acceptstates); 395 | } 396 | 397 | rules = rules.concat(expNFA.getRules()); 398 | rules = rules.concat(junq(previous).map( 399 | function(prevState){ 400 | return new dfa.Rule(prevState, dfa.eps, expNFA.startstate); 401 | } 402 | ).toArray()); 403 | 404 | if(i===this.from && this.to===Infinity){ 405 | rules = rules.concat(junq(expNFA.acceptstates).map( 406 | function(as){ 407 | return new dfa.Rule(as, dfa.eps, expNFA.startstate); 408 | } 409 | ).toArray()); 410 | } 411 | 412 | previous = expNFA.acceptstates; 413 | 414 | } 415 | 416 | 417 | 418 | var rb = new dfa.NDRuleBook(rules); 419 | var specs = {rulebook:rb, acceptstates:accept, startstate:start}; 420 | var nfa = new dfa.NFA(specs); 421 | return nfa; 422 | }; 423 | 424 | Interval.prototype.getMinMaxLength = function () { 425 | var explength = this.exp.getMinMaxLength(); 426 | return {min: this.from*explength.min, max:this.to*explength.max}; 427 | }; 428 | 429 | 430 | function LookAhead(head,tail){ 431 | this.first = head; 432 | this.second = tail; 433 | } 434 | 435 | LookAhead.prototype = new Concat(); 436 | LookAhead.prototype.toString = function () { 437 | return this.first.toString() + '/' + this.second.toString(); 438 | }; 439 | LookAhead.prototype.isLookAhead = function () { 440 | return true; 441 | }; 442 | 443 | function parseRegExp(str) { 444 | if(str==='$') return regex.EOF; 445 | var sr = new StringReader(str); 446 | var bol = false; 447 | if(sr.peek()=='^'){ 448 | sr.eat('^'); 449 | bol=true; 450 | } 451 | var ret = parseLookAhead(sr); 452 | if(bol) ret.bol = bol; 453 | return ret; 454 | } 455 | 456 | function parseLookAhead(input){ 457 | var head = parseRE(input); 458 | var c = input.peek(); 459 | var tail; 460 | if (input.more() && c === '/' || c==='$') { 461 | switch(c){ 462 | case '/': 463 | input.eat('/'); 464 | tail = parseRE(input); 465 | break; 466 | case '$': 467 | input.eat('$'); 468 | tail = parseRegExp('\r|\n'); 469 | break; 470 | } 471 | return new LookAhead(head, tail); 472 | } else { 473 | return head; 474 | } 475 | 476 | } 477 | 478 | function parseRE(input) { 479 | 480 | var term = parseTerm(input); 481 | 482 | if (input.more() && input.peek() === '|') { 483 | input.eat('|'); 484 | var term2 = parseRE(input); 485 | return new Choice(term, term2); 486 | } else { 487 | return term; 488 | } 489 | } 490 | 491 | function parseTerm(input) { 492 | var factor = new Empty(); 493 | 494 | while (input.more() && input.peek() !== ')' && input.peek() !== '|' && input.peek() !== '/' && input.peek() !== '$') { 495 | var nextFactor = parseFactor(input); 496 | factor = new Concat(factor, nextFactor); 497 | } 498 | 499 | return factor; 500 | } 501 | 502 | function parseFactor(input) { 503 | var base = parseAtom(input); 504 | 505 | while (input.more() && 506 | (input.peek() === '*' || input.peek() == '+' || input.peek() == '?' || input.peek()=='{')) { 507 | var pattern = input.next(); 508 | if (pattern === '?') { 509 | base = new ZeroOrOne(base); 510 | } else if (pattern === '{'){ 511 | base = parseInterval(base,input); 512 | } else { 513 | base = new Repeat(base, pattern); 514 | } 515 | 516 | } 517 | 518 | return base; 519 | } 520 | 521 | function parseAtom(input) { 522 | var range; 523 | switch (input.peek()) { 524 | case '(': 525 | input.eat('('); 526 | var r = parseRE(input); 527 | input.eat(')'); 528 | return r; 529 | case '[': 530 | return parseCharacterClass(input); 531 | case '.': 532 | input.eat('.'); 533 | return DOT(); 534 | case '\\': 535 | return parseAtomEscape(input); 536 | 537 | default: 538 | return parseCharacter(input); 539 | } 540 | } 541 | 542 | function parseCharacterClass(input){ 543 | "use strict"; 544 | input.eat('['); 545 | var negate = false; 546 | var range; 547 | if(input.peek() === '^') { 548 | input.eat('^'); 549 | negate = true; 550 | } 551 | 552 | do{ 553 | var r = parseRange(input); 554 | if(!(r instanceof(CharactersRange))){ 555 | r = r.second; 556 | } 557 | r.setNegate(negate); 558 | if(!range){ 559 | range = r; 560 | } 561 | else { 562 | range.append(r); 563 | } 564 | } 565 | while(input.peek()!=']'); 566 | input.eat(']'); 567 | return range; 568 | } 569 | 570 | function parseAtomEscape(input){ 571 | input.eat('\\'); 572 | var c = input.next(); 573 | "use strict"; 574 | switch(c){ 575 | case 'd': 576 | return DIGIT(); 577 | break; 578 | case 'D': 579 | return NOTDIGIT(); 580 | break; 581 | case 's': 582 | return SPACE(); 583 | break; 584 | case 'S': 585 | return NOTSPACE(); 586 | break; 587 | case 'w': 588 | return WORD(); 589 | break; 590 | case 'W': 591 | return NOTWORD(); 592 | break; 593 | default: 594 | return new Character(parseCharacterEscape(c, input)); 595 | } 596 | return new Character(c); 597 | } 598 | 599 | function parseCharacterEscape(i,input){ 600 | "use strict"; 601 | var c; 602 | switch(i){ 603 | case 'r': 604 | c = '\r'; 605 | break; 606 | case 'n': 607 | c = '\n'; 608 | break; 609 | case 'f': 610 | c = '\f'; 611 | break; 612 | case 't': 613 | c = '\t'; 614 | break; 615 | case 'x': 616 | var hex = input.next()+input.next(); 617 | c = String.fromCharCode(parseInt(hex,16)); 618 | break; 619 | case 'u': 620 | hex = input.next()+input.next()+input.next()+input.next(); 621 | c = String.fromCharCode(parseInt(hex,16)); 622 | break; 623 | default: 624 | c = i; 625 | break; 626 | } 627 | return c; 628 | } 629 | 630 | function parseInterval(base, input){ 631 | var nstr = ''; 632 | 633 | while(input.peek()!==','&&input.peek()!=='}'){ 634 | nstr+=input.peek(); 635 | input.next(); 636 | } 637 | var n1 = parseInt(nstr) || 0; 638 | nstr=''; 639 | if(input.peek() === ','){ 640 | input.next(); 641 | 642 | while(input.peek()!=='}'){ 643 | nstr+=input.peek(); 644 | input.next(); 645 | } 646 | 647 | var n2 = parseInt(nstr) || Infinity; 648 | input.next(); 649 | return new Interval(base, n1, n2); 650 | } 651 | else { 652 | input.next(); 653 | return new Interval(base,n1,n1); 654 | } 655 | } 656 | 657 | var DIGIT = function(){return new CharactersRange('0', '9')}; 658 | var NOTDIGIT = function(){return parseRegExp("[^0-9]")}; 659 | var SPACE = function(){return parseRegExp("[ \\t\\r\\n\xA0]")}; 660 | var NOTSPACE = function(){return parseRegExp("[^ \\t\\r\\n\xA0]")}; 661 | var WORD = function(){return parseRegExp('[a-zA-Z0-9_]')}; 662 | var NOTWORD = function(){return parseRegExp('[^a-zA-Z0-9_]')}; 663 | //TODO: dot is not working right 664 | var DOT = function(){return parseRegExp('[^\\r\\n]')}; 665 | 666 | function parseCharacter(input){ 667 | var c = input.next(); 668 | return new Character(c); 669 | } 670 | 671 | function parseClassCharacter(input){ 672 | var c = input.peek(); 673 | if(c!='\\'){ 674 | input.eat(c); 675 | return c; 676 | } 677 | input.eat('\\'); 678 | //c = input.next(); 679 | return parseAtomEscape(input); 680 | } 681 | 682 | function parseClassAtom(input){ 683 | switch (input.peek()) { 684 | case '\\': 685 | return parseAtomEscape(input); 686 | default: 687 | return parseCharacter(input); 688 | } 689 | } 690 | 691 | function parseRange(input){ 692 | 693 | var range; 694 | 695 | var from = parseClassAtom(input); 696 | if(!from.character)//is this a range? 697 | return from; 698 | from = from.character; 699 | if(input.peek()==='-') 700 | { 701 | input.eat('-'); 702 | var to = parseClassCharacter(input); 703 | range = new CharactersRange(from,to); 704 | } 705 | 706 | else{ 707 | range = new CharactersRange(from,from) 708 | } 709 | //range.negate = negate; 710 | return range; 711 | } 712 | 713 | 714 | 715 | regex.parseRegExp = parseRegExp; 716 | regex.EOF = new RegPart(); 717 | 718 | })(regex || (regex = {}), automata,StringReader); 719 | 720 | if (typeof(module) !== 'undefined') { module.exports = regex; } 721 | -------------------------------------------------------------------------------- /lib/stringreader.js: -------------------------------------------------------------------------------- 1 | var StringReader= 2 | (function (sr,undefined) { 3 | var StringReader = function StringReader(str){ 4 | if(!(this instanceof StringReader)) return new StringReader(str); 5 | this.str = str; 6 | this.pos = 0; 7 | this.line = 0; 8 | this.col = 0; 9 | }; 10 | 11 | StringReader.prototype.getPos = function(){ 12 | return this.pos; 13 | }; 14 | 15 | StringReader.prototype.peek = function() 16 | { 17 | //TODO: handle EOF 18 | return this.str.charAt(this.pos); 19 | }; 20 | 21 | StringReader.prototype.eat = function(str) 22 | { 23 | var istr = this.str.substring(this.pos,this.pos+str.length); 24 | if(istr===str){ 25 | this.pos+=str.length; 26 | this.updatePos(str,1); 27 | } else { 28 | throw new Error('Expected "'+str+'", got "'+istr+'"!'); 29 | } 30 | }; 31 | 32 | StringReader.prototype.updatePos = function(str,delta){ 33 | for(var i=0;i{digits}*\.{digits}+ { 49 | this.jjval = parseFloat(this.jjtext); 50 | return 'float'; 51 | } 52 | 53 | <>{digits}+ { 54 | this.jjval = parseInt(this.jjtext); 55 | return 'integer'; 56 | } 57 | 58 | <>print { 59 | return 'print'; 60 | } 61 | 62 | <>\w+ { return 'id'; } 63 | 64 | <>\s* { } 65 | 66 | <>. { return this.jjtext; } 67 | 68 | <>$ { console.log('EOF'); return 'EOF'; } 69 | ``` 70 | 71 | The syntax is similar to Flex's, with some differences. 72 | The file is split in three areas, separated by a double percent. In the first area are the directives. The only currently supported is %moduleName, which allow you to specify the name of the generated module. 73 | 74 | The second section contains definitions, that allows you to assign names to regular expressions. 75 | 76 | The third section contains the actual ules. In order to recognize a token, you specify the regular expression that matches it, and then assign it an action. 77 | 78 | Take for example the following: 79 | ```[JavaSCript] 80 | <>\w+ { return 'id'; } 81 | ``` 82 | 83 | The double angled brakets are used to specify (optional) starting state of the rule (more on that later), in this case the rule is active in the DEFAULT state. 84 | The regular expression `\w+` matches one or more alphanumeric chars. 85 | The associated action (between curly braces) is a javascript function that should return the name of the matched token. 86 | This name is the name that can then be used in the grammar file. 87 | 88 | Regular Expressions Syntax 89 | ======================= 90 | 91 | Jacob implements most, if not all, the regular expressions mechanism found in most lexer, including forward lookahead. 92 | Here is a summary: 93 | 94 | | pattern | description | 95 | |---------|-------------| 96 | | x | matches character 'x' | 97 | | . | matches any character except newline | 98 | | [xyz] | this is a character class: it matches either 'x','y' or 'z' | 99 | | [a-f] | character class with range: it matches every character from 'a' to 'f' included | 100 | | [^a-f] | range negation: matches everything BUT 'a'-'f' | 101 | | r* | matches 0 or more times the regular expression r | 102 | | r+ | matches 1 or more times r | 103 | | r? | matches 0 or 1 r | 104 | | r{2,5} | matches from 2 to 5 r | 105 | | r{2,} | matches 2 or more r | 106 | | r{,5} | matches from 0 to 5 r | 107 | | r{4} | matches r exactly 4 times | 108 | | {digits} | matches the definition named 'digits' | 109 | | \X | '\' is the escape character, can be used to insert character like '\n','\r','\t' or to escape regex special character like \* | 110 | | \x2a | matches character with hex code 2a | 111 | | \u2103 | matches unicode character U+2103 | 112 | | rs | the regular expression r followed by the regular expresson s | 113 | | r|s | either r or s | 114 | | r/s | lookahead: matches r only if it is followed by s | 115 | | ^r | matches r only at the beginning of a line | 116 | | r$ | matches r only at the end of a line | 117 | | ab(cd)* | matches ab, abcd, abcdcd, abcdcdcd etc. | 118 | 119 | 120 | 121 | 122 | Lexer Actions 123 | ============ 124 | In the actions you should specify what the lexer should do after recognizing a token. The simplest action is the empty one; 125 | 126 | `<>\s* { }` 127 | 128 | This is useful to ignore a given input. Since the action won't return any token name, the lexer will continue processing the input without outputting any token for the matched content, thus in fact ignoring that input. In the example above the whitespace is ignored. 129 | 130 | Another common situation is having to parse the input to have a meaningful token: 131 | ```[JavaScript] 132 | <>{digits}+ { 133 | this.jjval = parseInt(this.jjtext); 134 | return 'integer'; 135 | } 136 | ``` 137 | Inside actions, this points to the lexer itself. In the lexer `jjtext` contains the text that the regular expression matched. `jjval` by default contains the same text as`jjtext` but you can change it inside an action. In the example above the text is parsed to get an integer value, which is then stored in `jjval`. 138 | Note that `jjval` is the value that is used in the parsing phase by your interpreter/compiler. 139 | Another powerful thing you could do inside an action is to change the lexer's state. Take this example: 140 | 141 | ```[JavaScript] 142 | <>\/\* {this.pushState('BLOCKCOMMENT');} 143 | \*\/ {this.popState();} 144 | (\n|\r|.) {} 145 | ``` 146 | 147 | When the lexer encounters a `/*` sequence, it will enter a BLOCKCOMMENT state because of the action `this.pushState('BLOCKCOMMENT');`. In this state, the only active rules art the ones in which the state list (the list inside angular brackets) contains the BLOCKCOMMENT identifier. So while the lexer is in BLOCKCOMMENT state, it whill ignore any character because of the rule `(\n|\r|.) {}` 148 | The only way to change the state is to encounter a `*/` sequence in which the action `this.popState();` while resume the state that was active before encountering the first `/*` sequence. 149 | The previous rules thus can be used to ignore block comments with a C-like syntax. 150 | 151 | Here is a table of all the members of the generated lexer that are available for you inside the actions: 152 | 153 | | member | description | 154 | |--------|-------------| 155 | | jjtext | the text matched by the regex | 156 | | jjval | the value of the current token, by default the same as jjtext | 157 | | jjpos | the position of the current token inside the input string | 158 | | less(n)| this function can be called to push back n character into the input stream | 159 | | isEOF()| returns true if the input is at the end | 160 | 161 | Of courser the generated Lexer is a JavaScript object, so you can dynamically add any member or method you need in your actions. 162 | 163 | Using the Lexer 164 | --------------- 165 | After you generate a lexer, you create one using the constructor: 166 | 167 | ```[JavaScript] 168 | var MyLexer = require('mylexer'); //mylexer.js being the file generated by jacob 169 | var lexer = new MyLexer(); 170 | lexer.setInput('string to be parsed'); 171 | 172 | var firstToken = lexer.nextToken(); 173 | 174 | if(lexer.isEOF(firstToken)){ 175 | .... 176 | } 177 | ``` 178 | 179 | After setting the input, you call the ```nextToken()``` to make the lexer read the next token. 180 | Each call to ```nextToken()``` will yield a new token, or a special object meaning you reached the end of the input. A non-EOF token looks like this: 181 | ```[JavaScript] 182 | { 183 | name: "integer", //this is the name given to the token by the action 184 | value: 12, //the action parsed the input into a number 185 | lexeme: "12", //the original section of the input corresponding to this token 186 | position: 35, //the position in the input string at which the token started 187 | pos: { //the position using lines and columns (useful for reporting parsing errors) 188 | col: 2, 189 | line: 7 190 | } 191 | 192 | } 193 | ``` 194 | 195 | At the end of the input, the lexer will return an special object to signal that we reached the end, to test if a token is the EOF (end of file) token, use ```lexer.isEOF(token)```. 196 | 197 | Usually you don't use the lexer by itself, of course, but you pass it over to a parser. 198 | 199 | 200 | Parser 201 | ====== 202 | 203 | In order to generate a parser you need to give Jacob the specification file containing an attributed grammar which describes the language you want to interpret/compile. Simply put, the grammar file will contains the grammar rules and the actions that the parser must execute after recognizing each rule. 204 | Jacob can generate **SLR**, **LALR** and **LR1** parser type. If not specified, Jacob will choose the most appropiate parser type given the grammar. 205 | 206 | Here is an example of a jacob grammar file: 207 | 208 | 209 | ```[Javascript] 210 | %moduleName MyParser 211 | 212 | %left 'PLUS' '-' 213 | %left '*' '/' 214 | 215 | Program = { Statement } function(){}; 216 | 217 | Statement = 'id' '=' Expression function(id,_, exp){this[id] = exp;} 218 | | 'print' Expression function(_,exp){ console.log(exp);} ; 219 | 220 | Expression = Expression 'PLUS' Expression function (e1, _, e2) { 221 | return e1 + e2; 222 | } 223 | | Expression '-' Expression function (e1, _, e2) { 224 | return e1 - e2; 225 | } 226 | | Expression '*' Expression function (e1, _, e2) { 227 | return e1 * e2; 228 | } 229 | | Expression '/' Expression function (e1, _, e2) { 230 | return e1 / e2; 231 | } 232 | | 'integer' function (i) { 233 | return i; 234 | } 235 | | 'id' function (id) { 236 | return this[id]; 237 | } 238 | | '(' Expression ')' function (_, e) { 239 | return e; 240 | } 241 | 242 | ; 243 | 244 | ``` 245 | 246 | Directives 247 | ---------- 248 | At the top of the file you define directives, those can be: 249 | 250 | `%moduleName ` sets the name of the generated module 251 | 252 | `%mode SLR|LALR|LR1` sets the type of the generated parser. If not provided the simplest type able to parse the grammar is used. 253 | 254 | `%left|%right token1 [token2 token3...]` sets the precedence and the associativity of an operator. The operator defined first have lower precedence. The name used for the tokens should be the ones that the lexer is returning in their actions. They could be the actual input character (es: '-', '°') or an actual name (es: 'PLUS') the important thing is that they match what the lexer is returning. 255 | 256 | `%nonassoc` tells the parser that that token is not associative, so that it will raise an error whenever it will be used is an expression with other operator of the same precedence. 257 | 258 | EBNF 259 | ---- 260 | Tha actual grammar is specified in Extended Backus–Naur Form, with every rule followed by an action consisting in a javascript function. 261 | 262 | The EBNF in the example defines rules using Nonterminal symbols (Program, Statement, Expression, ...) and terminal symbols ('(', ')', 'integer', '*',...). Terminal symbols are contained in single quotes and should match the name of the tokens as yielded by the lexer. 263 | 264 | Each production can have several alternatives (separated by the pipe symbol) and each alternative can have its own action function. The action function will receive a parameter for each element of the corresponding right-hand-side part of the production. 265 | 266 | Each rule is then terminated with a semicolon (;). 267 | 268 | EBNF is more handier than BNF because it also adds shortcuts to define repetitions, optionals and grouping: 269 | 270 | `{ ... }` means 0 or more (...) 271 | 272 | `[ ... ]` means 0 or one (...) 273 | 274 | `( ... )` will group the content into one group. This is useful to inline some rules that don't need a special action for themselves, for example: 275 | 276 | `Assignment = Identifier ':=' ( 'integer' | Identifier | 'string' ) function(id,_,rhsvalue) { ... };` 277 | 278 | Using The Parser 279 | ---------------- 280 | 281 | Like with the Lexer, you create the parser using its contstructor 282 | 283 | ```[JavaScript] 284 | var MyParser = require('./myparser'); //myparser.js is the file generated by jacob 285 | var parser = new MyParser(); 286 | ``` 287 | To start the parsing, you call the ```parse()``` method, passing a lexer as the first parameter: 288 | 289 | 290 | ```[JavaScript] 291 | var MyLexer = require('mylexer'); //mylexer.js being the file generated by jacob 292 | var lexer = new MyLexer(); 293 | parser.Parse(lexer); 294 | ``` 295 | 296 | 297 | What the parse() method do and yields depends entirely of what you put inside the grammar actions. If writing a simple expression interpreter, for example, it could yield the final result. If writing a compiler it could yield the source code or, even better, an Abstract Syntax Tree, 298 | 299 | Usually, though, for achieving non trivial results, you must integrate the grammar actions with the outside world, through the use of an *execution context* and an *environment*. 300 | 301 | Execution Context 302 | ----------------- 303 | When the grammar actions are evaluated, their *this* is referring to an object that acts as an execution context. This can be used to store state, identifier tables, and so on. 304 | You can pass your own object to act as execution context in the ```parse()``` method: 305 | 306 | ```[JavaScript] 307 | var cxt = {}; //this object can be whatever you need. Grammar actions will be evauated in this object's context 308 | 309 | parser.Parse(lexer, cxt); 310 | //cxt will now contain whatever the grammar actions put there 311 | ``` 312 | If not provided, the parser will create an empty object to be used as execution context for the actions. 313 | 314 | Environment 315 | ----------- 316 | 317 | Inside the actions, you might need to use other modules, for example containing the classes of your AST. To make those modules accessible to your grammar actions during parsing time, you pass an environment object to the parser constructor: 318 | ```[JavaScript] 319 | var MyParser = require('myparser'); 320 | var astclasses = require('astclasses'); 321 | var othermodule = require('someothermodule'); 322 | 323 | var parser = new MyParser({ 324 | ast: astclasses, 325 | other: othermodule 326 | }); 327 | ``` 328 | 329 | The actions of your grammar can reach the environmental module using any of the following names: 330 | *environment, env, modules, imports*. For example: 331 | 332 | ```[JavaScript] 333 | Statement = 'id' '=' Expression function(id,_, exp){ 334 | return new imports.ast.Assignment(id,exp); 335 | } 336 | ``` 337 | 338 | 339 | 340 | 341 | -------------------------------------------------------------------------------- /test/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Tests Index 6 | 7 | 8 |

15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /test/lexer_compiled_tests.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Lexer Compiled Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |

Reload

22 |

23 |

24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /test/lexer_compiled_tests.js: -------------------------------------------------------------------------------- 1 | var chai = chai || require('chai'); 2 | var lexer = lexer || require('../lib/lexer'); 3 | var StringReader = StringReader || require('../lib/stringreader'); 4 | var expect = chai.expect; 5 | 6 | var basicTokens = { 7 | definitions: { 8 | "digits": "[0-9]" 9 | }, 10 | tokens: [ 11 | {'regexp': '\\/\\*', action: function(){this.pushState('COMMENT');}},//0 12 | {'regexp': '\\*\\/', action: function(){this.popState();}, state:'COMMENT'},//1 13 | 14 | {'regexp': '{digits}*\\.{digits}+', action: function(){this.jjval=parseFloat(this.jjtext); return 'float';}},//2 15 | { "regexp": '{digits}+', action: function(){this.jjval=parseInt(this.jjtext); return 'integer';}},//3 16 | { 'regexp': 'if', action: function(){return 'IF';}},//4 17 | { "regexp": 'zx*/xy*', action: function(){return 'zx*='+this.jjtext;}},//5,6,7 18 | { 'regexp': '(a|ab)/ba', action: function(){return '(a|ab)='+this.jjtext;}},//8 19 | { "regexp": 'ac/b*', action: function(){return 'ac='+this.jjtext;}},//9 20 | { "regexp": '^foo', action: function(){return 'at BOL '+this.jjtext;}},//10 21 | { "regexp": 'foo$', action: function(){return 'at EOL '+this.jjtext;}},//11 22 | { "regexp": '\$', action: function(){return 'LPAR';}},//12 23 | { "regexp": '\\(\\d+\$', action: function(){return 'EXPR';}},//13 24 | { 'regexp': '\\w+', action: function(){return 'ident';}},//14 25 | { 'regexp': '\\s*', action: function(){}},//15 26 | { 'regexp': '[^\\w\\s/]+', action: function(){return 'notword_or_space';}},//16 27 | { 'regexp': '.', action: function(){return this.jjtext;}},//17 28 | {'regexp': '(\\n|\\r|.)', action: function(){}, state:'COMMENT'},//18 29 | { 'regexp': '$', action: function(){console.log('end of file');}}//19 30 | ], 31 | moduleName: 'MyLexer' 32 | }; 33 | 34 | function compileLexer(str){ 35 | var lexersrc = lexer.generateLexer(basicTokens); 36 | eval(lexersrc); 37 | var lexer1 = new MyLexer().setInput(str); 38 | return lexer1; 39 | } 40 | 41 | describe("lex.Lexer",function() { 42 | describe('nextToken', function(){ 43 | it('should resolve definitions', function(){ 44 | var lexer = compileLexer('321.02') 45 | var token = lexer.nextToken(); 46 | expect(token.name).to.equal('float'); 47 | expect(token.value).to.equal(321.02); 48 | }); 49 | it('should respect precedences', function(){ 50 | var lexer = compileLexer('if iframe'); 51 | var token = lexer.nextToken(); 52 | expect(token.name).to.equal('IF'); 53 | expect(token.value).to.equal('if'); 54 | token = lexer.nextToken(); 55 | expect(token.name).to.equal('ident'); 56 | expect(token.value).to.equal('iframe'); 57 | }); 58 | it('should rollback', function(){ 59 | var lexer = compileLexer('(123456'); 60 | var token = lexer.nextToken(); 61 | expect(token.name).to.equal('LPAR'); 62 | token = lexer.nextToken(); 63 | expect(token.name).to.equal('integer'); 64 | expect(lexer.jjval).to.equal(123456); 65 | lexer.setInput(new StringReader('(23)')); 66 | token = lexer.nextToken(); 67 | expect(token.name).to.equal('EXPR'); 68 | 69 | }); 70 | it('should ignore tokens without returns', function(){ 71 | var lexer = compileLexer('foo \r\n 42'); 72 | var token = lexer.nextToken(); 73 | token = lexer.nextToken(); 74 | expect(token.name).to.equal('integer'); 75 | expect(token.value).to.equal(42); 76 | }); 77 | 78 | it('should return "EOF" at end of file', function(){ 79 | var lexer = compileLexer('foo'); 80 | var token = lexer.nextToken(); 81 | token = lexer.nextToken(); 82 | expect(lexer.isEOF(token)).to.be.true; 83 | //expect(token.value).to.equal('42'); 84 | }); 85 | 86 | it('should implement negate ranges', function(){ 87 | var lexer = compileLexer('<< 32'); 88 | var token = lexer.nextToken(); 89 | expect(token.name).to.equal('notword_or_space'); 90 | expect(token.value).to.equal('<<'); 91 | }); 92 | 93 | it('should use states', function(){ 94 | var lexer = compileLexer('foo /* start comment *****\r\n** ecc ecc ***\r\n ecc ecc */ 42'); 95 | var token = lexer.nextToken(); 96 | token = lexer.nextToken(); 97 | expect(token.name).to.equal('integer'); 98 | expect(token.value).to.equal(42); 99 | }); 100 | 101 | it('should use lookaheads', function(){ 102 | var lexer = compileLexer('zxxxy aba ac'); 103 | var token = lexer.nextToken(); 104 | expect(token.name).to.equal('zx*=zxx'); 105 | token = lexer.nextToken(); 106 | expect(token.name).to.equal('ident'); 107 | token = lexer.nextToken(); 108 | expect(token.name).to.equal('(a|ab)=a'); 109 | token = lexer.nextToken(); 110 | expect(token.name).to.equal('ident'); 111 | token = lexer.nextToken(); 112 | expect(token.name).to.equal('ac=ac'); 113 | token = lexer.nextToken(); 114 | expect(lexer.isEOF(token)).to.be.true; 115 | }); 116 | 117 | it('should suport EOL', function(){ 118 | var lexer = compileLexer(' foo \r\n foo\r\n'); 119 | var token = lexer.nextToken(); 120 | expect(token.name).to.equal('ident'); 121 | token = lexer.nextToken(); 122 | expect(token.name).to.equal('at EOL foo'); 123 | 124 | }); 125 | 126 | 127 | 128 | it('should suport BOL', function(){ 129 | var lexer = compileLexer('foo foo \r\nfoo foo\r\n'); 130 | var token = lexer.nextToken(); 131 | expect(token.name).to.equal('at BOL foo'); 132 | token = lexer.nextToken(); 133 | expect(token.name).to.equal('ident'); 134 | token = lexer.nextToken(); 135 | expect(token.name).to.equal('at BOL foo'); 136 | token = lexer.nextToken(); 137 | expect(token.name).to.equal('at EOL foo'); 138 | 139 | 140 | }); 141 | 142 | }); 143 | 144 | 145 | }); -------------------------------------------------------------------------------- /test/lexer_dynamic_tests.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Lexer Dyamic Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |

Reload

22 |

23 |

24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /test/lexer_dynamic_tests.js: -------------------------------------------------------------------------------- 1 | var chai = chai || require('chai'); 2 | var lexer = lexer || require('../lib/lexer'); 3 | var StringReader = StringReader || require('../lib/stringreader'); 4 | var expect = chai.expect; 5 | 6 | var basicTokens = { 7 | definitions: { 8 | "digits": "[0-9]" 9 | }, 10 | tokens: [ 11 | {'regexp': '\\/\\*', action: function(){this.pushState('COMMENT');}},//0 12 | {'regexp': '\\*\\/', action: function(){this.popState();}, state:'COMMENT'},//1 13 | 14 | {'regexp': '{digits}*\\.{digits}+', action: function(){this.jjval=parseFloat(this.jjtext); return 'float';}},//2 15 | { "regexp": '{digits}+', action: function(){this.jjval=parseInt(this.jjtext); return 'integer';}},//3 16 | { 'regexp': 'if', action: function(){return 'IF';}},//4 17 | { "regexp": 'zx*/xy*', action: function(){return 'zx*='+this.jjtext;}},//5,6,7 18 | { 'regexp': '(a|ab)/ba', action: function(){return '(a|ab)='+this.jjtext;}},//8 19 | { "regexp": 'ac/b*', action: function(){return 'ac='+this.jjtext;}},//9 20 | { "regexp": '^foo', action: function(){return 'at BOL '+this.jjtext;}},//10 21 | { "regexp": 'foo$', action: function(){return 'at EOL '+this.jjtext;}},//11 22 | { "regexp": '\$', action: function(){return 'LPAR';}},//12 23 | { "regexp": '\\(\\d+\$', action: function(){return 'EXPR';}},//13 24 | { 'regexp': '\\w+', action: function(){return 'ident';}},//14 25 | { 'regexp': '\\s*', action: function(){}},//15 26 | { 'regexp': '[^\\w\\s/]+', action: function(){return 'notword_or_space';}},//16 27 | 28 | { 'regexp': '.', action: function(){return this.jjtext;}},//17 29 | {'regexp': '(\\n|\\r|.)', action: function(){}, state:'COMMENT'},//18 30 | { 'regexp': '$', action: function(){console.log('end of file');}}//19 31 | ] 32 | }; 33 | 34 | describe("lex.Lexer",function() { 35 | describe('nextToken', function(){ 36 | 37 | it('should parse abcab correctly', function(){ 38 | 39 | var tokens = { 40 | tokens: [ 41 | {regexp: 'abc', action: function(){console.log('abc=',this.jjtext);return this.jjtext;}}, 42 | {regexp: 'a', action: function(){console.log('a=',this.jjtext);return this.jjtext;}}, 43 | {regexp: 'b', action: function(){console.log('b=',this.jjtext);return this.jjtext;}} 44 | ] 45 | }; 46 | 47 | var lexer1 = new lexer.Lexer(tokens).setInput(new StringReader('abcab')); 48 | var token = lexer1.nextToken(); 49 | 50 | expect(token.value).to.equal('abc'); 51 | token = lexer1.nextToken(); 52 | expect(token.name).to.equal('a'); 53 | token = lexer1.nextToken(); 54 | expect(token.value).to.equal('b'); 55 | token = lexer1.nextToken(); 56 | expect(token.name).to.equal(undefined); 57 | }); 58 | 59 | 60 | it('should parse digits correctly', function(){ 61 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('3 + 4. ')); 62 | var token = lexer1.nextToken(); 63 | expect(token.name).to.equal('integer'); 64 | expect(token.value).to.equal(3); 65 | token = lexer1.nextToken(); 66 | expect(token.name).to.equal('notword_or_space'); 67 | token = lexer1.nextToken(); 68 | expect(token.name).to.equal('integer'); 69 | expect(token.value).to.equal(4); 70 | token = lexer1.nextToken(); 71 | expect(token.name).to.equal('notword_or_space'); 72 | }); 73 | 74 | it('should resolve definitions', function(){ 75 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('321.02')); 76 | var token = lexer1.nextToken(); 77 | expect(token.name).to.equal('float'); 78 | expect(token.value).to.equal(321.02); 79 | }); 80 | it('should respect precedences', function(){ 81 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('if iframe')); 82 | var token = lexer1.nextToken(); 83 | expect(token.name).to.equal('IF'); 84 | expect(token.value).to.equal('if'); 85 | token = lexer1.nextToken(); 86 | expect(token.name).to.equal('ident'); 87 | expect(token.value).to.equal('iframe'); 88 | }); 89 | 90 | it('should rollback', function(){ 91 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('(123456')); 92 | var token = lexer1.nextToken(); 93 | expect(token.name).to.equal('LPAR'); 94 | token = lexer1.nextToken(); 95 | expect(token.name).to.equal('integer'); 96 | expect(lexer1.jjval).to.equal(123456); 97 | lexer1.setInput(new StringReader('(23)')); 98 | token = lexer1.nextToken(); 99 | expect(token.name).to.equal('EXPR'); 100 | 101 | }); 102 | 103 | it('should ignore tokens without returns', function(){ 104 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('foo \r\n 42')); 105 | var token = lexer1.nextToken(); 106 | token = lexer1.nextToken(); 107 | expect(token.name).to.equal('integer'); 108 | expect(token.value).to.equal(42); 109 | }); 110 | 111 | it('should return "EOF" at end of file', function(){ 112 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('foo')); 113 | var token = lexer1.nextToken(); 114 | token = lexer1.nextToken(); 115 | expect(token).to.equal(lexer.EOF); 116 | //expect(lexer.jjtext).to.equal('42'); 117 | }); 118 | 119 | it('should implement negate ranges', function(){ 120 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('<< 32')); 121 | var token = lexer1.nextToken(); 122 | expect(token.name).to.equal('notword_or_space'); 123 | expect(token.value).to.equal('<<'); 124 | }); 125 | 126 | it('should use states', function(){ 127 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('foo /* start comment *****\r\n** ecc ecc ***\r\n ecc ecc */ 42')); 128 | var token = lexer1.nextToken(); 129 | token = lexer1.nextToken(); 130 | expect(token.name).to.equal('integer'); 131 | expect(token.value).to.equal(42); 132 | }); 133 | 134 | it('should use lookaheads', function(){ 135 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader('zxxxy aba ac')); 136 | var token = lexer1.nextToken(); 137 | expect(token.name).to.equal('zx*=zxx'); 138 | token = lexer1.nextToken(); 139 | expect(token.name).to.equal('ident'); 140 | token = lexer1.nextToken(); 141 | expect(token.name).to.equal('(a|ab)=a'); 142 | token = lexer1.nextToken(); 143 | expect(token.name).to.equal('ident'); 144 | token = lexer1.nextToken(); 145 | expect(token.name).to.equal('ac=ac'); 146 | token = lexer1.nextToken(); 147 | expect(token).to.equal(lexer.EOF); 148 | }); 149 | 150 | it('should suport EOL', function(){ 151 | var lexer1 = new lexer.Lexer(basicTokens).setInput(new StringReader(' foo \r\n foo\r\n')); 152 | var token = lexer1.nextToken(); 153 | expect(token.name).to.equal('ident'); 154 | token = lexer1.nextToken(); 155 | expect(token.name).to.equal('at EOL foo'); 156 | 157 | }); 158 | 159 | it('should suport BOL', function(){ 160 | var lexer1 = new lexer.Lexer(basicTokens).setInput('foo foo \r\nfoo foo\r\n'); 161 | var token = lexer1.nextToken(); 162 | expect(token.name).to.equal('at BOL foo'); 163 | token = lexer1.nextToken(); 164 | expect(token.name).to.equal('ident'); 165 | token = lexer1.nextToken(); 166 | expect(token.name).to.equal('at BOL foo'); 167 | token = lexer1.nextToken(); 168 | expect(token.name).to.equal('at EOL foo'); 169 | 170 | 171 | }); 172 | }); 173 | 174 | 175 | }); -------------------------------------------------------------------------------- /test/parser_compiled_tests.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Parser Dynamic Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |

Reload

23 |

24 |

25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /test/parser_compiled_tests.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 17/08/2014. 3 | */ 4 | var chai = chai || require('chai'); 5 | var lexer = lexer || require('../lib/lexer'); 6 | var parser = parser || require('../lib/parser'); 7 | var StringReader = StringReader || require('../lib/stringreader'); 8 | var expect = chai.expect; 9 | 10 | var tokenspecs = { 11 | definitions: { 12 | "digits": "[0-9]" 13 | }, 14 | tokens: [ 15 | {'regexp': '{digits}*\\.{digits}+', action: function(){this.jjval=parseFloat(this.jjtext); return 'float';}}, 16 | { "regexp": '{digits}+', action: function(){this.jjval=parseInt(this.jjtext); return 'integer';}}, 17 | { 'regexp': 'if', action: function(){return 'IF';}}, 18 | { 'regexp': '\\w+', action: function(){return this.jjtext;}}, //or return 'ident' 19 | { 'regexp': '\\s*', action: function(){console.log('ignore spaces');}}, 20 | { 'regexp': '.', action: function(){return this.jjtext;}}, 21 | { 'regexp': '<>', action: function(){console.log('end of file');return 'EOF';}} 22 | ], 23 | moduleName: 'MyLexer' 24 | }; 25 | 26 | /* 27 | E->E+T | T 28 | T->T*F | F 29 | F->( E ) | id 30 | */ 31 | var ExpGrammar = { 32 | tokens: ['integer','+','*','(',')'], 33 | 34 | productions:[ 35 | ['E',['E','+','T'],function(e,_,t){ 36 | return '('+e+'+'+t+')'; 37 | }], 38 | ['E',['T'],function(t){ 39 | return t; 40 | }], 41 | ['T',['T','*','F'],function(t,_,f){ 42 | return '('+t+'*'+f+')'; 43 | }], 44 | ['T',['F'],function(f){ 45 | return f; 46 | }], 47 | ['F',['(','E',')'],function(e){ 48 | return '('+e+')'; 49 | }], 50 | ['F',['integer'],function(i){ 51 | return i.toString(); 52 | }] 53 | 54 | ], 55 | moduleName: 'MyParser' 56 | 57 | }; 58 | 59 | /* 60 | NON SLR1 61 | S -> L=R | R 62 | L -> *R | id 63 | R -> L 64 | 65 | es: *id=**id 66 | */ 67 | var NonSLR1Grammar = { 68 | tokens: ['integer','=','*'], 69 | 70 | productions:[ 71 | ['S',['L','=','R'],function(s,_,r){ 72 | return '('+s+'='+r+')'; 73 | }], 74 | ['S',['R'],function(r){ 75 | return r; 76 | }], 77 | ['L',['*','R'],function(_,r){ 78 | return '('+'*'+r+')'; 79 | }], 80 | ['L',['integer'],function(i){ 81 | return i.toString(); 82 | }], 83 | ['R',['L'],function(l){ 84 | return l; 85 | }] 86 | 87 | ], 88 | moduleName: 'MyParser' 89 | 90 | }; 91 | 92 | /* NON LALR1 93 | S -> aEa | bEb | aFb | bFa 94 | E -> e 95 | F -> e 96 | */ 97 | var NonLALR1Grammar = { 98 | tokens: ['!','?','*'], 99 | 100 | productions:[ 101 | ['S',['!','E','!'],function(_,E){ 102 | return '(!'+E+'!)'; 103 | }], 104 | ['S',['?','E','?'],function(_,E){ 105 | return '(?'+E+'?)'; 106 | }], 107 | ['S',['!','F','?'],function(_,F){ 108 | return '(!'+F+'?)'; 109 | }], 110 | ['S',['?','F','!'],function(_,F){ 111 | return '(?'+F+'!)'; 112 | }], 113 | ['E',['*'],function(_,F){ 114 | return 'e*'; 115 | }], 116 | ['F',['*'],function(_,F){ 117 | return 'f*'; 118 | }] 119 | ], 120 | moduleName: 'MyParser' 121 | 122 | }; 123 | 124 | var AmbiguousGrammar = { 125 | tokens: ['integer','+','*','(',')'], 126 | operators:[ 127 | ['+','left',100], 128 | ['*','left',200] 129 | ], 130 | productions:[ 131 | ['E',['E','+','E'],function(e,_,t){ 132 | return '('+e+'+'+t+')'; 133 | }], 134 | ['E',['E','*','E'],function(e,_,t){ 135 | return '('+e+'*'+t+')'; 136 | }], 137 | ['E',['(','E',')'],function(_,e){ 138 | return '{'+e+'}'; 139 | }], 140 | ['E',['integer'],function(i){ 141 | return i.toString(); 142 | }] 143 | 144 | ], 145 | moduleName: 'MyParser' 146 | 147 | }; 148 | 149 | function compileLexer(str){ 150 | var lexersrc = lexer.generateLexer(tokenspecs); 151 | eval(lexersrc); 152 | var lexer1 = new MyLexer().setInput(str); 153 | return lexer1; 154 | } 155 | 156 | function compileParser(grammar, mode){ 157 | grammar.mode = mode; 158 | var parsersrc = parser.generateParser(grammar); 159 | eval(parsersrc); 160 | return new MyParser(); 161 | } 162 | 163 | describe("parser.Parser",function() { 164 | 165 | describe("SLR mode", function() { 166 | it('parses SLR grammar', function () { 167 | var lexer1 = compileLexer('2+3*4+5'); 168 | var p = compileParser(ExpGrammar, 'SLR'); 169 | var ret = p.parse(lexer1); 170 | expect(ret).to.be.equal('((2+(3*4))+5)'); 171 | }); 172 | 173 | it('fails on Non-SLR(1) grammar', function () { 174 | var lexer1 = compileLexer('*23=18'); 175 | var p; 176 | expect(function() { 177 | p = compileParser(NonSLR1Grammar, 'SLR') 178 | } 179 | ).to.throw(/Shift \/ Reduce conflict/); 180 | 181 | }); 182 | 183 | it('fails on Non-LALR(1) grammar', function () { 184 | var p; 185 | expect(function() { 186 | p = compileParser(NonLALR1Grammar, 'SLR'); 187 | } 188 | ).to.throw(/Reduce\/Reduce conflict/); 189 | 190 | }); 191 | 192 | it('parses Ambiguous grammar', function () { 193 | var lexer1 = compileLexer('2+3*4+5'); 194 | var p = compileParser(AmbiguousGrammar, 'SLR'); 195 | var ret = p.parse(lexer1); 196 | expect(ret).to.be.equal('((2+(3*4))+5)'); 197 | }); 198 | }); 199 | 200 | describe("LALR1 mode", function() { 201 | it('parses SLR grammar', function () { 202 | var lexer1 = compileLexer('2+3*4+5'); 203 | var p =compileParser(ExpGrammar, 'LALR1'); 204 | var ret = p.parse(lexer1); 205 | expect(ret).to.be.equal('((2+(3*4))+5)'); 206 | }); 207 | 208 | it('parses Non-SLR(1) grammar', function () { 209 | var lexer1 = compileLexer('*23=18'); 210 | var p = compileParser(NonSLR1Grammar, 'LALR1'); 211 | var ret = p.parse(lexer1); 212 | expect(ret).to.be.equal('((*23)=18)'); 213 | 214 | }); 215 | 216 | it('fails on Non-LALR(1) grammar', function () { 217 | var p; 218 | expect(function() { 219 | p = compileParser(NonLALR1Grammar, 'LALR1'); 220 | } 221 | ).to.throw(/Reduce\/Reduce conflict/); 222 | 223 | }); 224 | 225 | it('parses Ambiguous grammar', function () { 226 | var lexer1 = compileLexer('2+3*4+5'); 227 | var p = compileParser(AmbiguousGrammar, 'LALR1'); 228 | var ret = p.parse(lexer1); 229 | expect(ret).to.be.equal('((2+(3*4))+5)'); 230 | }); 231 | }); 232 | 233 | describe("LR1 mode", function() { 234 | it('parses SLR grammar', function () { 235 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 236 | var p = compileParser(ExpGrammar, 'LR1'); 237 | var ret = p.parse(lexer1); 238 | expect(ret).to.be.equal('((2+(3*4))+5)'); 239 | }); 240 | 241 | it('parses Non-SLR(1) grammar', function () { 242 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('*23=18')); 243 | var p = compileParser(NonSLR1Grammar, 'LR1') 244 | var ret = p.parse(lexer1); 245 | expect(ret).to.be.equal('((*23)=18)'); 246 | 247 | }); 248 | 249 | it('parses Non-LALR(1) grammar', function () { 250 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('!*?')); 251 | var p = compileParser(NonLALR1Grammar, 'LR1') 252 | var ret = p.parse(lexer1); 253 | expect(ret).to.be.equal('(!f*?)'); 254 | 255 | }); 256 | 257 | it('parses Ambiguous grammar', function () { 258 | var lexer1 = compileLexer('2+3*4+5'); 259 | var p = compileParser(AmbiguousGrammar, 'LR1'); 260 | var ret = p.parse(lexer1); 261 | expect(ret).to.be.equal('((2+(3*4))+5)'); 262 | }); 263 | 264 | it('select the correct mode for the grammar', function () { 265 | var lexer1 = compileLexer('!*?'); 266 | var p = compileParser(NonLALR1Grammar,undefined); 267 | var ret = p.parse(lexer1); 268 | expect(ret).to.be.equal('(!f*?)'); 269 | 270 | }); 271 | }); 272 | 273 | }); -------------------------------------------------------------------------------- /test/parser_dynamic_tests.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Parser Dynamic Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |

Reload

23 |

24 |

25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /test/parser_dynamic_tests.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 17/08/2014. 3 | */ 4 | var chai = chai || require('chai'); 5 | var lexer = lexer || require('../lib/lexer'); 6 | var parser = parser || require('../lib/parser'); 7 | var StringReader = StringReader || require('../lib/stringreader'); 8 | var expect = chai.expect; 9 | 10 | var tokenspecs = { 11 | definitions: { 12 | "digits": "[0-9]" 13 | }, 14 | tokens: [ 15 | {'regexp': '{digits}*\\.{digits}+', action: function(){this.jjval=parseFloat(this.jjtext); return 'float';}}, 16 | { "regexp": '{digits}+', action: function(){this.jjval=parseInt(this.jjtext); return 'integer';}}, 17 | { 'regexp': 'if', action: function(){return 'IF';}}, 18 | { 'regexp': '\\w+', action: function(){return this.jjtext;}}, //or return 'ident' 19 | { 'regexp': '\\s*', action: function(){console.log('ignore spaces');}}, 20 | { 'regexp': '.', action: function(){return this.jjtext;}}, 21 | { 'regexp': '<>', action: function(){console.log('end of file');return 'EOF';}} 22 | ] 23 | }; 24 | 25 | /* 26 | E->E+T | T 27 | T->T*F | F 28 | F->( E ) | id 29 | */ 30 | var ExpGrammar = { 31 | tokens: ['integer','+','*','(',')'], 32 | 33 | productions:[ 34 | ['E',['E','+','T'],function(e,_,t){ 35 | return '('+e+'+'+t+')'; 36 | }], 37 | ['E',['T'],function(t){ 38 | return t; 39 | }], 40 | ['T',['T','*','F'],function(t,_,f){ 41 | return '('+t+'*'+f+')'; 42 | }], 43 | ['T',['F'],function(f){ 44 | return f; 45 | }], 46 | ['F',['(','E',')'],function(e){ 47 | return '('+e+')'; 48 | }], 49 | ['F',['integer'],function(i){ 50 | return i.toString(); 51 | }] 52 | 53 | ] 54 | 55 | }; 56 | 57 | /* 58 | NON SLR1 59 | S -> L=R | R 60 | L -> *R | id 61 | R -> L 62 | 63 | es: *id=**id 64 | */ 65 | var NonSLR1Grammar = { 66 | tokens: ['integer','=','*'], 67 | 68 | productions:[ 69 | ['S',['L','=','R'],function(s,_,r){ 70 | return '('+s+'='+r+')'; 71 | }], 72 | ['S',['R'],function(r){ 73 | return r; 74 | }], 75 | ['L',['*','R'],function(_,r){ 76 | return '('+'*'+r+')'; 77 | }], 78 | ['L',['integer'],function(i){ 79 | return i.toString(); 80 | }], 81 | ['R',['L'],function(l){ 82 | return l; 83 | }] 84 | 85 | ] 86 | 87 | }; 88 | 89 | /* NON LALR1 90 | S -> aEa | bEb | aFb | bFa 91 | E -> e 92 | F -> e 93 | */ 94 | var NonLALR1Grammar = { 95 | tokens: ['!','?','*'], 96 | 97 | productions:[ 98 | ['S',['!','E','!'],function(_,E){ 99 | return '(!'+E+'!)'; 100 | }], 101 | ['S',['?','E','?'],function(_,E){ 102 | return '(?'+E+'?)'; 103 | }], 104 | ['S',['!','F','?'],function(_,F){ 105 | return '(!'+F+'?)'; 106 | }], 107 | ['S',['?','F','!'],function(_,F){ 108 | return '(?'+F+'!)'; 109 | }], 110 | ['E',['*'],function(_,F){ 111 | return 'e*'; 112 | }], 113 | ['F',['*'],function(_,F){ 114 | return 'f*'; 115 | }] 116 | ] 117 | 118 | }; 119 | var AmbiguousGrammar = { 120 | tokens: ['integer','+','*','(',')'], 121 | operators:[ 122 | ['+','left',100], 123 | ['*','left',200] 124 | ], 125 | productions:[ 126 | ['E',['E','+','E'],function(e,_,t){ 127 | return '('+e+'+'+t+')'; 128 | }], 129 | ['E',['E','*','E'],function(e,_,t){ 130 | return '('+e+'*'+t+')'; 131 | }], 132 | ['E',['(','E',')'],function(_,e){ 133 | return '{'+e+'}'; 134 | }], 135 | ['E',['integer'],function(i){ 136 | return i.toString(); 137 | }] 138 | 139 | ], 140 | parserName: 'MyParser' 141 | 142 | }; 143 | 144 | 145 | describe("parser.Parser",function() { 146 | 147 | describe("SLR mode", function() { 148 | it('parses SLR grammar', function () { 149 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 150 | ExpGrammar.mode = 'SLR'; 151 | var p = new parser.Parser(ExpGrammar); 152 | var ret = p.parse(lexer1); 153 | expect(ret).to.be.equal('((2+(3*4))+5)'); 154 | }); 155 | 156 | it('fails on Non-SLR(1) grammar', function () { 157 | //var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 158 | var p; 159 | NonSLR1Grammar.mode = 'SLR'; 160 | expect(function() { 161 | p = new parser.Parser(NonSLR1Grammar) 162 | } 163 | ).to.throw(/Shift \/ Reduce conflict/); 164 | 165 | }); 166 | 167 | it('fails on Non-LALR(1) grammar', function () { 168 | var p; 169 | NonLALR1Grammar.mode = 'SLR'; 170 | expect(function() { 171 | p = new parser.Parser(NonLALR1Grammar); 172 | } 173 | ).to.throw(/Reduce\/Reduce conflict/); 174 | 175 | }); 176 | }); 177 | 178 | describe("LALR1 mode", function() { 179 | it('parses SLR grammar', function () { 180 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 181 | ExpGrammar.mode = 'LALR1'; 182 | var p = new parser.Parser(ExpGrammar); 183 | var ret = p.parse(lexer1); 184 | expect(ret).to.be.equal('((2+(3*4))+5)'); 185 | }); 186 | 187 | it('parses Non-SLR(1) grammar', function () { 188 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('*23=18')); 189 | NonSLR1Grammar.mode = 'LALR1'; 190 | var p = new parser.Parser(NonSLR1Grammar); 191 | var ret = p.parse(lexer1); 192 | expect(ret).to.be.equal('((*23)=18)'); 193 | 194 | }); 195 | 196 | it('fails on Non-LALR(1) grammar', function () { 197 | var p; 198 | NonLALR1Grammar.mode = 'LALR1'; 199 | expect(function() { 200 | P = new parser.Parser(NonLALR1Grammar); 201 | } 202 | ).to.throw(/Reduce\/Reduce conflict/); 203 | 204 | }); 205 | }); 206 | 207 | describe("LR1 mode", function() { 208 | it('parses SLR grammar', function () { 209 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 210 | ExpGrammar.mode = 'LR1'; 211 | var p = new parser.Parser(ExpGrammar); 212 | var ret = p.parse(lexer1); 213 | expect(ret).to.be.equal('((2+(3*4))+5)'); 214 | }); 215 | 216 | it('parses Non-SLR(1) grammar', function () { 217 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('*23=18')); 218 | NonSLR1Grammar.mode = 'LR1'; 219 | var p = new parser.Parser(NonSLR1Grammar); 220 | var ret = p.parse(lexer1); 221 | expect(ret).to.be.equal('((*23)=18)'); 222 | 223 | }); 224 | 225 | it('parses Non-LALR(1) grammar', function () { 226 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('!*?')); 227 | NonLALR1Grammar.mode = 'LR1'; 228 | var p = new parser.Parser(NonLALR1Grammar); 229 | var ret = p.parse(lexer1); 230 | expect(ret).to.be.equal('(!f*?)'); 231 | 232 | }); 233 | 234 | it('parses Ambiguous grammar', function () { 235 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('2+3*4+5')); 236 | AmbiguousGrammar.mode = 'SLR'; 237 | var p = new parser.Parser(AmbiguousGrammar); 238 | var ret = p.parse(lexer1); 239 | expect(ret).to.be.equal('((2+(3*4))+5)'); 240 | }); 241 | 242 | it('select the correct mode for the grammar', function () { 243 | var lexer1 = new lexer.Lexer(tokenspecs).setInput(new StringReader('!*?')); 244 | NonLALR1Grammar.mode = undefined; 245 | var p = new parser.Parser(NonLALR1Grammar); 246 | var ret = p.parse(lexer1); 247 | expect(ret).to.be.equal('(!f*?)'); 248 | 249 | }); 250 | }); 251 | 252 | }); -------------------------------------------------------------------------------- /test/regex_parsing_tests.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Regex Parsing Tests 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |

Reload

21 |

22 |

23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /test/regex_parsing_tests.js: -------------------------------------------------------------------------------- 1 | var chai = chai || require('chai'); 2 | var regex = regex || require('../lib/regex'); 3 | var expect = chai.expect; 4 | 5 | 6 | describe("regex.RegExParser",function() { 7 | 8 | it('parses single characters', function(){ 9 | var res = regex.parseRegExp('ab3'); 10 | expect(res).to.be.a('object'); 11 | expect(res).to.be.instanceof(regex.Concat); 12 | var lengths = res.getMinMaxLength(); 13 | expect(lengths).to.have.property('min',3); 14 | expect(lengths).to.have.property('max',3); 15 | }); 16 | 17 | it('parses character escapes', function(){ 18 | var res = regex.parseRegExp('\\/\\*'); 19 | 20 | expect(res).to.be.a('object'); 21 | expect(res.matches('/*')).to.be.true; 22 | 23 | }); 24 | 25 | it('parses character class', function(){ 26 | var res = regex.parseRegExp('\\w'); 27 | var res2 = regex.parseRegExp('[a-zA-Z0-9_]'); 28 | expect(res).to.be.a('object'); 29 | expect(res.second).to.be.eql(res2); 30 | expect(res.matches('F')).to.be.true; 31 | expect(res.matches('%')).to.be.false; 32 | var lengths = res.getMinMaxLength(); 33 | expect(lengths).to.have.property('min',1); 34 | expect(lengths).to.have.property('max',1); 35 | }); 36 | 37 | it('parses negation', function(){ 38 | var re = regex.parseRegExp('[^\\}]*'); 39 | expect(re.matches('hello()')).to.be.true; 40 | }); 41 | 42 | it('parses simple range', function(){ 43 | var res = regex.parseRegExp('[a-z]'); 44 | expect(res).to.be.a('object'); 45 | expect(res).to.be.instanceof(regex.Concat); 46 | expect(res).to.have.deep.property('second.from','a'); 47 | expect(res).to.have.deep.property('second.to','z'); 48 | expect(res).to.have.deep.property('second.negate',false); 49 | expect(res).to.have.deep.property('second.character',res.second); 50 | var lengths = res.getMinMaxLength(); 51 | expect(lengths).to.have.property('min',1); 52 | expect(lengths).to.have.property('max',1); 53 | }); 54 | 55 | it('parses negated range', function(){ 56 | var res = regex.parseRegExp('[^a-z]'); 57 | expect(res).to.be.a('object'); 58 | expect(res).to.be.instanceof(regex.Concat); 59 | expect(res).to.have.deep.property('second.from','a'); 60 | expect(res).to.have.deep.property('second.to','z'); 61 | expect(res).to.have.deep.property('second.negate',true); 62 | expect(res).to.have.deep.property('second.character',res.second); 63 | var lengths = res.getMinMaxLength(); 64 | expect(lengths).to.have.property('min',1); 65 | expect(lengths).to.have.property('max',1); 66 | }); 67 | 68 | it('parses range with astclasses', function(){ 69 | var res = regex.parseRegExp('[\\w\\s]'); 70 | expect(res).to.be.a('object'); 71 | expect(res.second).to.be.instanceof(regex.CharactersRange); 72 | expect(res.second).to.have.deep.property('from','a'); 73 | expect(res.second).to.have.deep.property('to','z'); 74 | expect(res.second).to.have.deep.property('negate',false); 75 | expect(res.second).to.have.deep.property('character',res.second); 76 | 77 | expect(res.second.next).to.have.deep.property('from','A'); 78 | expect(res.second.next).to.have.deep.property('to','Z'); 79 | expect(res.second.next).to.have.deep.property('negate',false); 80 | expect(res.second.next).to.have.deep.property('character',res.second.next); 81 | expect(res.matches('$')).to.be.false; 82 | expect(res.matches('o')).to.be.true; 83 | }); 84 | 85 | it('parses negated complex range', function(){ 86 | var res = regex.parseRegExp('[^a-cf-hn-p]'); 87 | expect(res).to.be.a('object'); 88 | expect(res.second).to.be.instanceof(regex.CharactersRange); 89 | expect(res.second).to.have.deep.property('from','a'); 90 | expect(res.second).to.have.deep.property('to','c'); 91 | expect(res.second).to.have.deep.property('negate',true); 92 | expect(res.second).to.have.deep.property('character',res.second); 93 | 94 | expect(res.second.next).to.have.deep.property('from','f'); 95 | expect(res.second.next).to.have.deep.property('to','h'); 96 | expect(res.second.next).to.have.deep.property('negate',true); 97 | expect(res.second.next).to.have.deep.property('character',res.second.next); 98 | 99 | expect(res.second.next.next).to.have.deep.property('from','n'); 100 | expect(res.second.next.next).to.have.deep.property('to','p'); 101 | expect(res.second.next.next).to.have.deep.property('negate',true); 102 | expect(res.second.next.next).to.have.deep.property('character',res.second.next.next); 103 | 104 | expect(res.matches('l')).to.be.true; 105 | expect(res.matches('o')).to.be.false; 106 | }); 107 | 108 | it('parses negated range with astclasses', function(){ 109 | var res = regex.parseRegExp('[^\\w\\s]'); 110 | res = regex.parseRegExp('[^\\w\\s]'); 111 | expect(res).to.be.a('object'); 112 | expect(res.second).to.be.instanceof(regex.CharactersRange); 113 | expect(res.second).to.have.deep.property('from','a'); 114 | expect(res.second).to.have.deep.property('to','z'); 115 | expect(res.second).to.have.deep.property('negate',true); 116 | expect(res.second).to.have.deep.property('character',res.second); 117 | 118 | expect(res.second.next).to.have.deep.property('from','A'); 119 | expect(res.second.next).to.have.deep.property('to','Z'); 120 | expect(res.second.next).to.have.deep.property('negate',true); 121 | expect(res.second.next).to.have.deep.property('character',res.second.next); 122 | 123 | expect(res.second.next.next).to.have.deep.property('from','0'); 124 | expect(res.second.next.next).to.have.deep.property('to','9'); 125 | expect(res.second.next.next).to.have.deep.property('negate',true); 126 | expect(res.second.next.next).to.have.deep.property('character',res.second.next.next); 127 | expect(res.matches('$')).to.be.true; 128 | expect(res.matches(' ')).to.be.false; 129 | }); 130 | 131 | it('parses alternatives', function(){ 132 | var res = regex.parseRegExp('a|b|c'); 133 | 134 | expect(res).to.be.a('object'); 135 | 136 | 137 | expect(res.matches('a')).to.be.true; 138 | expect(res.matches('b')).to.be.true; 139 | expect(res.matches('c')).to.be.true; 140 | expect(res.matches('d')).to.be.false; 141 | var lengths = res.getMinMaxLength(); 142 | expect(lengths).to.have.property('min',1); 143 | expect(lengths).to.have.property('max',1); 144 | }); 145 | 146 | it('parses alternatives - 2', function(){ 147 | var res = regex.parseRegExp('a|bcd|ca'); 148 | 149 | expect(res).to.be.a('object'); 150 | 151 | 152 | expect(res.matches('a')).to.be.true; 153 | expect(res.matches('b')).to.be.false; 154 | expect(res.matches('c')).to.be.false; 155 | expect(res.matches('bcd')).to.be.true; 156 | var lengths = res.getMinMaxLength(); 157 | expect(lengths).to.have.property('min',1); 158 | expect(lengths).to.have.property('max',3); 159 | }); 160 | 161 | it('parses repetitions (*)', function(){ 162 | var res = regex.parseRegExp('a*b'); 163 | 164 | expect(res).to.be.a('object'); 165 | 166 | 167 | expect(res.matches('a')).to.be.false; 168 | expect(res.matches('b')).to.be.true; 169 | expect(res.matches('ab')).to.be.true; 170 | expect(res.matches('aba')).to.be.false; 171 | var lengths = res.getMinMaxLength(); 172 | expect(lengths).to.have.property('min',1); 173 | expect(lengths).to.have.property('max',Infinity); 174 | 175 | res = regex.parseRegExp('(ab)*cb*'); 176 | expect(res.matches('abbbcbbb')).to.be.false; 177 | expect(res.matches('abababcbbb')).to.be.true; 178 | }); 179 | 180 | it('parses repetitions - (+)', function(){ 181 | var res = regex.parseRegExp('a+b'); 182 | 183 | expect(res).to.be.a('object'); 184 | 185 | 186 | expect(res.matches('a')).to.be.false; 187 | expect(res.matches('b')).to.be.false; 188 | expect(res.matches('ab')).to.be.true; 189 | expect(res.matches('aba')).to.be.false; 190 | var lengths = res.getMinMaxLength(); 191 | expect(lengths).to.have.property('min',2); 192 | expect(lengths).to.have.property('max',Infinity); 193 | }); 194 | 195 | it('parses repetitions - (?)', function(){ 196 | var res = regex.parseRegExp('a?b'); 197 | 198 | expect(res).to.be.a('object'); 199 | 200 | 201 | expect(res.matches('a')).to.be.false; 202 | expect(res.matches('b')).to.be.true; 203 | expect(res.matches('ab')).to.be.true; 204 | expect(res.matches('aba')).to.be.false; 205 | var lengths = res.getMinMaxLength(); 206 | expect(lengths).to.have.property('min',1); 207 | expect(lengths).to.have.property('max',2); 208 | }); 209 | 210 | it('parses lookahead', function(){ 211 | var res = regex.parseRegExp('a*/ba'); 212 | expect(res.isLookAhead()).to.be.true; 213 | 214 | }); 215 | 216 | it('parses BOL', function(){ 217 | var res = regex.parseRegExp('^aa'); 218 | expect(res.bol).to.be.true; 219 | expect(res.matches('aa')).to.be.true; 220 | res = regex.parseRegExp('aa'); 221 | expect(res.bol).to.be.undefined; 222 | expect(res.matches('aa')).to.be.true; 223 | }); 224 | 225 | it('parses EOL', function(){ 226 | var res = regex.parseRegExp('aa$'); 227 | 228 | }); 229 | 230 | it('parses Intervals', function(){ 231 | var res = regex.parseRegExp('(ab){2,3}'); 232 | expect(res.matches('ab')).to.be.false; 233 | expect(res.matches('abab')).to.be.true; 234 | expect(res.matches('ababab')).to.be.true; 235 | expect(res.matches('abababab')).to.be.false; 236 | res = regex.parseRegExp('ac{,2}a'); 237 | expect(res.matches('aa')).to.be.true; 238 | expect(res.matches('acca')).to.be.true; 239 | expect(res.matches('accca')).to.be.false; 240 | res = regex.parseRegExp('ac{2,}a'); 241 | expect(res.matches('aa')).to.be.false; 242 | expect(res.matches('aca')).to.be.false; 243 | expect(res.matches('acca')).to.be.true; 244 | expect(res.matches('accca')).to.be.true; 245 | expect(res.matches('acccccccca')).to.be.true; 246 | res = regex.parseRegExp('ac{2}a'); 247 | expect(res.matches('aca')).to.be.false; 248 | expect(res.matches('acca')).to.be.true; 249 | expect(res.matches('accca')).to.be.false; 250 | }); 251 | 252 | }); -------------------------------------------------------------------------------- /test/testexamples.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by gcannata on 30/08/2014. 3 | */ 4 | //to build: jacob - t basictokens.jacoblex -l ./examples/mylexer.js -g expression.jacobgram -p ./examples/expint.js 5 | var chai = chai || require('chai'); 6 | var expect = chai.expect; 7 | 8 | describe("jacob examples",function() { 9 | it('Expression Interpreter with inline actions', function(){ 10 | 11 | var jacob = require('../index'); 12 | 13 | jacob.elaborateLexFile('./examples/basictokens.jacoblex','./examples/basiclexer.js'); 14 | jacob.elaborateGramFile('./examples/Expression/expression.jacobgram','./examples/Expression/expint.js'); 15 | 16 | var Lexer = require('../examples/basiclexer'); 17 | var Parser = require('../examples/Expression/expint'); 18 | var l = new Lexer().setInput('a = 2+3*4\r\na = a / 2\r\nprint a'); 19 | var p = new Parser(); 20 | var cxt = {}; 21 | var ret = p.parse(l,cxt); 22 | 23 | expect(cxt).to.have.property('a',7); 24 | }); 25 | 26 | it('Expression Interpreter with Abstract Syntax Tree', function(){ 27 | 28 | var jacob = require('../index'); 29 | 30 | jacob.elaborateLexFile('./examples/basictokens.jacoblex','./examples/basiclexer.js'); 31 | jacob.elaborateGramFile('./examples/Expression/expressionast.jacobgram','./examples/Expression/expast.js'); 32 | 33 | var Lexer = require('../examples/basiclexer'); 34 | var Parser = require('../examples/Expression/expast'); 35 | var astclasses = require('../examples/Expression/ExpAstClasses'); 36 | //we augment the ast with custom behaviour (who needs visitor pattern in javascript?) 37 | astclasses = require('../examples/Expression/expeval'); 38 | var l = new Lexer().setInput('a = 2+3*4\r\na = a / 2\r\nprint a'); 39 | var p = new Parser(astclasses); 40 | 41 | var program = p.parse(l,{}); 42 | 43 | 44 | var cxt = {}; 45 | program.eval(cxt); 46 | expect(cxt).to.have.property('a',7); 47 | }); 48 | 49 | }); 50 | 51 | --------------------------------------------------------------------------------