├── LICENSE.txt ├── README.md ├── algorithms.js ├── assert.js ├── check.js ├── generate.js ├── index.js ├── package.json ├── parser.js ├── printers.js ├── test.js └── types.js /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Bakkot 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CFGrammar-Tool 2 | ============== 3 | 4 | A JavaScript library for working with [context-free grammars](http://en.wikipedia.org/wiki/Context-free_grammar). It's also a node.js module (`npm install cfgrammar-tool`). 5 | 6 | Check out the the [demo](http://bakkot.github.io/cfgrammar-tool/). 7 | 8 | Features 9 | -------- 10 | 11 | * Parsing. The implementation is [Earley's algorithm](http://en.wikipedia.org/wiki/Earley_parser), so arbitrary CFGs are supported without transformation. Optionally keep track of two parses or all parses, so as to catch ambiguity. Note that tracking all parses can take exponential or infinite time (though the latter possibility can be detected in advance). 12 | 13 | * Generation. Given a grammar, generate a string of length *n* in its language. All such strings are generated with non-zero probability, and if the grammar is unambiguous and does not contain nullable nonterminals then strings are generated uniformly at random. Requires *n*^2 preprocessing time, then linear time for each string. 14 | - Useful for automatic testing when QuickCheck and its ilk aren't generating sufficiently structured data. For example, `test.js` contains a CFG for CFGs, which was used to automatically test this very application. 15 | 16 | * Diagnostics and manipulation. Find/remove unreachable symbols, symbols which do not generate any string, nullable symbols, duplicate rules, unit productions (A -> B), etc. 17 | 18 | 19 | Example 20 | ------- 21 | 22 | ```javascript 23 | var cfgtool = require('cfgrammar-tool'); 24 | var types = cfgtool.types; 25 | var parser = cfgtool.parser; 26 | var generatorFactory = cfgtool.generator; 27 | 28 | var Grammar = types.Grammar; 29 | var Rule = types.Rule; 30 | var T = types.T; 31 | var NT = types.NT; 32 | var exprGrammar = Grammar([ 33 | Rule('E', [NT('E'), T('+'), NT('T')]), 34 | Rule('E', [NT('T')]), 35 | Rule('T', [NT('T'), T('*'), NT('F')]), 36 | Rule('T', [NT('F')]), 37 | Rule('F', [T('('), NT('E'), T(')')]), 38 | Rule('F', [T('n')]) 39 | ]); 40 | 41 | parser.parse(exprGrammar, 'n*(n+n)').length > 0; // true 42 | parser.parse(exprGrammar, 'n(n+n)').length > 0; // false 43 | 44 | var generator = generatorFactory(exprGrammar); 45 | generator(21); // something like 'n*((n+(n)*n+n+n*n))*n' 46 | ``` 47 | 48 | TODO 49 | ---- 50 | 51 | * General code cleanup; this was mostly written in a couple of marathon sessions to try to get a tool based on it up, and the haste shows. Strict mode and linting, too. 52 | 53 | * Normal forms: put a grammar in [Chomsky normal form](http://en.wikipedia.org/wiki/Chomsky_normal_form), [Greibach normal form](http://en.wikipedia.org/wiki/Greibach_normal_form), or others. 54 | 55 | * Import and export: parse and produce [BNF](http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form) and other representations of grammars. 56 | 57 | * Automatic tokenization. Currently all tokens are implicitly single-character strings, at least on the parsing end, which is often not what you want. 58 | 59 | * ~~[Port to a language with a proper type system](https://github.com/bakkot/cfgrammar)~~. 60 | 61 | * ~~[Put up a demo page on gh-pages.](http://bakkot.github.io/cfgrammar-tool/)~~ 62 | 63 | License 64 | ------- 65 | 66 | Licensed under the [MIT license](http://opensource.org/licenses/MIT). If you're making public or commercial use of this library, I encourage (but do not require) you to tell me about it! 67 | -------------------------------------------------------------------------------- /algorithms.js: -------------------------------------------------------------------------------- 1 | var Rule = require('./types').Rule; 2 | var assert = require('./assert'); 3 | // pass in the Grammar constructor and its prototype will be modified to have various algorithms 4 | module.exports = function(Grammar) { 5 | 6 | 7 | // todo annotate almost-terminals, which are nonterminals which can only produce strings consisting only of terminals or strings consisting of terminals and nonterminals (other than itself) which are almost-terminal. 8 | // todo simplified and aggressive simplified. denulls, standardizes nonterminal names, standardizes rule ordering. aggressive simplified probably invokes a new 'stripped' fn: it reduces the number of rules by folding almost-terminal rules into the things which make them. obviously this can have exponential blowup. 9 | // possibly also try to remove redundant rules? 10 | 11 | 12 | 13 | // modify the grammar so each symbol has a 'nullable' property 14 | // and the grammar to have a 'nullables' property, a list of nullable symbols 15 | // returns the list of nullables 16 | // http://cstheory.stackexchange.com/a/2493 17 | Grammar.prototype.annotateNullables = function() { 18 | if(this.hasOwnProperty('nullables')) return this.nullables; // already done, don't redo 19 | 20 | this.nullables = []; 21 | var queue = []; 22 | var cs = []; // count of non-distinct symbols in RHS of rule i currently marked non-nullable, which does not make for a good variable name 23 | var rMap = this.getReverseMap(); 24 | 25 | for(var i=0; i 0) { 62 | var cur = queue.pop(); 63 | for(var i=0; i AB, B->'', A->A. then B is reachable.) 84 | // grammar gets an "unreachables" property 85 | // returns the list of unreachables 86 | Grammar.prototype.annotateUnreachables = function() { 87 | if(this.hasOwnProperty('unreachables')) return this.unreachables; // already done, don't redo 88 | 89 | this.unreachables = []; 90 | var queue = [this.start]; 91 | 92 | for(var i=0; i 0) { 99 | var cur = queue.pop(); 100 | for(var j=0; j 0) { 160 | var cur = queue.pop(); 161 | for(var i=0; i A (via some chain of length > 0) 188 | // grammar gets a "selfDerivings" property 189 | // returns the list of self-deriving symbols 190 | // http://cs.stackexchange.com/a/40967/12130 191 | Grammar.prototype.annotateSelfDeriving = function() { 192 | if(this.hasOwnProperty('selfDerivings')) return this.selfDerivings; // already done, don't redo 193 | 194 | this.selfDerivings = []; 195 | 196 | this.annotateNullables(); 197 | 198 | var derives = {}; // derives.A.B holds if A *=> B 199 | for(var i=0; i B) removed. 343 | // does not modify the grammar. if the result is empty, returns {empty: true}. 344 | Grammar.prototype.strippedUnitProductions = function() { 345 | var newRules = []; 346 | 347 | var done = []; 348 | var queue = []; 349 | function seen(rule) { 350 | for(var i=0; iB 374 | enqueue(rule); 375 | } 376 | } 377 | 378 | while(queue.length > 0) { 379 | var rule = queue.pop(); 380 | done.push(rule); 381 | var sym = rule.production[0].data; // everything in the queue is a unit production 382 | if(sym !== rule.name) { // rule is not A->A, which can just be ignored 383 | for(var j=0; jwhatever 385 | var newRule = Rule(rule.name, origRule.production.slice(0)); // A->whatever 386 | if(newRule.production.length !==1 || newRule.production[0].type == 'T') { 387 | newRules.push(newRule); 388 | } 389 | else { 390 | enqueue(newRule); 391 | } 392 | } 393 | } 394 | } 395 | 396 | if(newRules.length == 0) { 397 | return {empty: true}; 398 | } 399 | 400 | return Grammar(newRules, this.start); // I'm... pretty sure this is correct. 401 | } 402 | 403 | 404 | // returns a copy of the grammar with duplicate rules removed. 405 | // does not modify the grammar. 406 | Grammar.prototype.strippedDuplicates = function() { 407 | var newRules = []; 408 | for(var i=0; iB. does not modify the grammar, 426 | // except annotating. if the result is empty, returns {empty: true}. 427 | Grammar.prototype.stripped = function() { 428 | var newGrammar = this.strippedUnitProductions(); 429 | if(newGrammar.empty) return newGrammar; 430 | 431 | // useless, then unreachable. not the other way around. 432 | newGrammar = newGrammar.strippedUseless(); 433 | if(newGrammar.empty) return newGrammar; 434 | 435 | newGrammar = newGrammar.strippedUnreachable(); 436 | if(newGrammar.empty) return newGrammar; 437 | 438 | assert(newGrammar.annotateUseless().length == 0, 'Suddenly there are more useless symbols?'); 439 | 440 | newGrammar = newGrammar.strippedDuplicates(); 441 | return newGrammar; 442 | } 443 | 444 | 445 | 446 | // not exactly the world's most efficient implement, but whatever. 447 | // used in stripping nullables. 448 | function nthSubset(list, n) { 449 | var out = []; 450 | for(var i = 0, p = 1; p<=n; ++i, p<<=1) { 451 | if(p & n) { 452 | out.push(list[i]); 453 | } 454 | } 455 | return out; 456 | } 457 | 458 | 459 | // returns a copy of the grammar which recognizes the same language (except without the empty string) 460 | // does not modify the grammar. new grammar has a property 'makesEpsilon' which is true iff epsilon 461 | // was recognized by the original grammar. 462 | // if the language is otherwise empty, returns {empty: true, makesEpsilon: [as appropriate]} 463 | Grammar.prototype.deNulled = function() { 464 | 465 | var newGrammar = this.stripped(); 466 | if(newGrammar.empty) { 467 | newGrammar.makesEpsilon = false; 468 | return newGrammar; 469 | } 470 | 471 | newGrammar.annotateNullables(); 472 | var makesEpsilon = newGrammar.symbolMap[newGrammar.start].nullable; 473 | newRules = []; 474 | for(var i=0; i 0) { 29 | throw Error('Generator does not work when there are infinitely many parses for a string. (ie, when A*=>A is possible.)'); 30 | } 31 | 32 | var rand = !deterministic ? Math.random : (function() { 33 | var seed = 0x2F6E2B1; 34 | return function() { 35 | // Robert Jenkins' 32 bit integer hash function. From Octane / V8. 36 | seed = ((seed + 0x7ED55D16) + (seed << 12)) & 0xFFFFFFFF; 37 | seed = ((seed ^ 0xC761C23C) ^ (seed >>> 19)) & 0xFFFFFFFF; 38 | seed = ((seed + 0x165667B1) + (seed << 5)) & 0xFFFFFFFF; 39 | seed = ((seed + 0xD3A2646C) ^ (seed << 9)) & 0xFFFFFFFF; 40 | seed = ((seed + 0xFD7046C5) + (seed << 3)) & 0xFFFFFFFF; 41 | seed = ((seed ^ 0xB55A4F09) ^ (seed >>> 16)) & 0xFFFFFFFF; 42 | return (seed & 0xFFFFFFF) / 0x10000000; 43 | }; 44 | }()); 45 | 46 | var ftable = {}; 47 | function f(sym, n) { 48 | if(!(sym in ftable)) { 49 | ftable[sym] = {}; 50 | } 51 | if(n in ftable[sym]) { 52 | return ftable[sym][n]; 53 | } 54 | 55 | var out = []; 56 | for(var j=0; j 0, index has been pushed along by a series of sub-parses completing, 57 | // each sub-parse representing a terminal or nonterminal in this rule's production. 58 | // backPointers is an array containing those completed sub-parses/States. 59 | // in particular, backPointers[i] is the State object corresponding to 60 | // rule.production[i] (or null if said production is a terminal). 61 | // TODO rename backPointers, do away with index 62 | // TODO have 'c' instead of null for terminals in backPointers 63 | function State(rule, index, predecessor, backPointers) { 64 | if(!(this instanceof State)) return new State(rule, index, predecessor, backPointers); 65 | this.rule = rule; 66 | this.index = index; 67 | this.predecessor = predecessor; 68 | this.backPointers = backPointers || []; 69 | assert(this.index == this.backPointers.length); // honestly could just do away with index at this point 70 | } 71 | State.prototype.done = function(){ return this.index === this.rule.production.length; } 72 | State.prototype.compare = function(other) { 73 | if(this.rule === other.rule 74 | && this.index === other.index 75 | && this.predecessor === other.predecessor) { 76 | if(arraysEqual(this.backPointers, other.backPointers)) { 77 | return enums.IDENTICAL; 78 | } 79 | else { 80 | return enums.SIMILAR; 81 | } 82 | } 83 | else { 84 | return enums.DISTINCT; 85 | } 86 | } 87 | State.prototype.next = function(){ return this.rule.production[this.index]; } 88 | State.prototype.toString = function(){ 89 | return '(' + this.rule.name + ' -> ' + this.rule.production.slice(0, this.index).join('') 90 | + '*' + this.rule.production.slice(this.index).join('') + ', ' + this.predecessor.toString() + ')'; 91 | } 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | function parse(grammar, str, produceCount) { 100 | if(typeof str !== 'string') throw Error('Can\'t parse non-string object ' + (typeof str)); 101 | var oldProduceCount = parser.PRODUCECOUNT; 102 | if(produceCount) { 103 | parser.PRODUCECOUNT = produceCount; 104 | } 105 | 106 | var chart = []; 107 | for(var i=0; i<=str.length; ++i) chart.push([]); 108 | 109 | function seen(state, strPos) { 110 | var count = 0; 111 | for(var i=0; i 1) { // we've seen something similar and do care 117 | return true; 118 | } 119 | } 120 | return false; 121 | } 122 | 123 | function scanner(state, strPos) { 124 | if(state.next().equals(T(str[strPos]))) { 125 | var newBPs = state.backPointers.slice(0); 126 | newBPs.push(null); // terminals do not need backpointers, of course 127 | var advanced = State(state.rule, state.index+1, state.predecessor, newBPs); 128 | if(!seen(advanced, strPos+1)) { 129 | chart[strPos+1].push(advanced); 130 | } 131 | } 132 | } 133 | 134 | function predictor(state, strPos) { 135 | var sym = state.next(); 136 | for(var i=0; iY will not generate an additional level in the AST. 72 | // discardImplicitTerminals: if a production contains both terminals and nonterminals, children does not contain the terminals. 73 | // ruleRenamingFunction should be a function from Rules in the grammar to names of rules (e.g. strings), which will then be used as the 'type' of nodes. If not present, 'type' will be the Rule itself. 74 | // Non-terminals in the resulting AST have 'type' and 'children' properties, with 'children' being an array. Terminals have type 'Terminal' and a 'value' property containing their value. 75 | 76 | var rename = typeof ruleRenamingFunction === 'function'; 77 | 78 | function backPointerToSubtree(bp) { 79 | if (collapseUnitProductions && bp.backPointers.length === 1) { 80 | var child = bp.backPointers[0]; 81 | if (child === null) { 82 | return { 83 | type: 'Terminal', 84 | value: bp.rule.production[0].data 85 | }; 86 | } else { 87 | return backPointerToSubtree(child); 88 | } 89 | } 90 | var tree = { 91 | type: rename ? ruleRenamingFunction(bp.rule) : bp.rule, 92 | children: [] 93 | } 94 | var keepTerminals = !(discardImplicitTerminals && bp.backPointers.some(function(c){return c!== null;})); 95 | for (var i = 0; i= highlightLength) { 172 | c = o; 173 | } 174 | 175 | if(typeof str[i] === 'string') { 176 | c.appendChild(document.createTextNode(str[i])); 177 | } 178 | else { 179 | var sp = document.createElement('span'); 180 | sp.className = 'cfg-symbol'; 181 | sp.appendChild(document.createTextNode(str[i].rule.name)); 182 | c.appendChild(sp); 183 | } 184 | } 185 | return o; 186 | } 187 | 188 | var out = document.createElement('table'); 189 | out.className = 'cfg-derivations derivations'; // TODO second is for compat 190 | out.innerHTML = 'RuleApplicationResult'; 191 | 192 | 193 | // handle GAMMA state specially 194 | var row = document.createElement('tr'); 195 | var cell = document.createElement('td'); 196 | var sp = document.createElement('sp'); 197 | sp.className = 'cfg-rule'; 198 | sp.innerHTML = 'Start \u2192 ' + '' + parse.backPointers[0].rule.name + ''; 199 | cell.appendChild(sp); 200 | row.appendChild(cell); 201 | 202 | cell = document.createElement('td'); 203 | var sp = document.createElement('span'); 204 | sp.className = 'cfg-start'; 205 | sp.appendChild(document.createTextNode('Start')); 206 | cell.appendChild(sp); 207 | row.appendChild(cell); 208 | 209 | str = [parse.backPointers[0]]; // ie, start symbol 210 | cell = document.createElement('td'); 211 | cell.appendChild(formatIntermediateString(-1)); 212 | row.appendChild(cell); 213 | 214 | out.appendChild(row); 215 | 216 | 217 | for(var i = 0; i'; 271 | o.appendChild(line); 272 | o.appendChild(document.createElement('br')); 273 | 274 | for(var i=0; i 0) { 284 | line.appendChild(document.createTextNode(' | ')); 285 | } 286 | var rule = grammar.symbolMap[sym].rules[j]; 287 | line.appendChild(domProduction(rule.production)); 288 | } 289 | o.appendChild(line); 290 | o.appendChild(document.createElement('br')); 291 | } 292 | 293 | return o; 294 | } 295 | 296 | 297 | module.exports = { 298 | subtreePrinter: subtreePrinter, 299 | rewritePrinter: rewritePrinter, 300 | astPrinter: astPrinter, 301 | domPrinter: domPrinter, 302 | domGrammarPrinter: domGrammarPrinter 303 | } -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var types = require('./types'); 2 | NT = types.NT; 3 | T = types.T; 4 | Rule = types.Rule; 5 | Grammar = types.Grammar; 6 | var generator = require('./generate'); 7 | var checks = require('./check'); 8 | var assert = require('./assert'); 9 | var parser = require('./parser'); 10 | var subtreePrinter = require('./printers').subtreePrinter; 11 | var astPrinter = require('./printers').astPrinter; 12 | 13 | 14 | 15 | 16 | // Arithmetic expressions on 0-9 (with precedence). Demonstrates two ways to evaluate a parse. 17 | 18 | var plus = Rule('E', [NT('E'), T('+'), NT('T')]); 19 | var term = Rule('E', [NT('T')]); 20 | var times = Rule('T', [NT('T'), T('*'), NT('F')]); 21 | var factor = Rule('T', [NT('F')]); 22 | var pos = Rule('F', [NT('P')]); 23 | var neg = Rule('F', [T('-'), NT('P')]); // JS does not allow --1 24 | var paren = Rule('P', [T('('), NT('E'), T(')')]); 25 | var digit = Rule('P', [NT('N')]); 26 | 27 | var mathGrammar = Grammar([ 28 | plus, 29 | term, 30 | times, 31 | factor, 32 | pos, 33 | neg, 34 | paren, 35 | digit, 36 | Rule('N', [T('0')]), 37 | Rule('N', [T('1')]), 38 | Rule('N', [T('2')]), 39 | Rule('N', [T('3')]), 40 | Rule('N', [T('4')]), 41 | Rule('N', [T('5')]), 42 | Rule('N', [T('6')]), 43 | Rule('N', [T('7')]), 44 | Rule('N', [T('8')]), 45 | Rule('N', [T('9')])] 46 | ); 47 | 48 | // You can treat the parse tree as a very complex AST and evaluate directly, as follows: 49 | plus.eval = function(state) { return mathEval(state.backPointers[0]) + mathEval(state.backPointers[2]); } 50 | times.eval = function(state) { return mathEval(state.backPointers[0]) * mathEval(state.backPointers[2]); } 51 | neg.eval = function(state) { return -mathEval(state.backPointers[1]); } 52 | paren.eval = function(state) { return mathEval(state.backPointers[1]); } 53 | digit.eval = function(state) { return parseInt(state.backPointers[0].rule.production[0].data); } 54 | 55 | function mathEval(state) { 56 | if(state.rule.eval) { 57 | return state.rule.eval(state); 58 | } 59 | else { 60 | assert(state.rule.production.length == 1, 'No valid evaluation rule.'); 61 | return mathEval(state.backPointers[0]); 62 | } 63 | } 64 | 65 | // Or you can use the astPrinter to get a sane AST, and then evaluate that. 66 | function toMathAst(parse) { 67 | return astPrinter(parse, true, true, function(rule) { // the function is a map from rules to the name of the corresponding node 68 | switch(rule) { 69 | case plus: 70 | return 'Plus'; 71 | case times: 72 | return 'Times'; 73 | case neg: 74 | return 'Negation'; 75 | case paren: 76 | return 'Paren'; 77 | default: 78 | return 'Unknown'; 79 | } 80 | }); 81 | } 82 | 83 | function mathAstEval(ast) { 84 | switch(ast.type) { 85 | case 'Plus': 86 | return mathAstEval(ast.children[0]) + mathAstEval(ast.children[1]); 87 | case 'Times': 88 | return mathAstEval(ast.children[0]) * mathAstEval(ast.children[1]); 89 | case 'Negation': 90 | return -mathAstEval(ast.children[0]); 91 | case 'Paren': 92 | return mathAstEval(ast.children[0]); 93 | case 'Terminal': 94 | return +ast.value; 95 | } 96 | } 97 | 98 | 99 | var mathGenerator = generator(mathGrammar); 100 | 101 | console.log('Arithmetic tests:'); 102 | for(var i=0; i<10; ++i) { 103 | var list = i % 2 === 0; 104 | var expr = mathGenerator(Math.round(Math.random()*40) + 1, { list: list }); 105 | if (list) { 106 | assert(Array.isArray(expr)); 107 | expr = expr.join(''); 108 | } 109 | var res = parser.parse(mathGrammar, expr, parser.PRODUCEALL); 110 | assert(res.length == 1, 'mathGrammar is ambiguous?'); 111 | 112 | var grammarVal = mathEval(res[0]); 113 | var jsVal = eval(expr); 114 | assert(grammarVal === jsVal || (isNaN(grammarVal) && isNaN(jsVal)), 'JS disagrees with our evaluation.'); 115 | 116 | var ast = toMathAst(res[0]); 117 | var astVal = mathAstEval(ast); 118 | assert(jsVal === astVal || (isNaN(jsVal) && isNaN(astVal)), 'JS disagrees with the AST evaluation.'); 119 | } 120 | console.log('Passed.'); 121 | 122 | 123 | 124 | // The ur-test: generate and test CFGs. BECAUSE I CAN. 125 | // Specifically, for five-or-fewer-symbol CFGs over [x,y,z]. 126 | // Only to be used for generation, not parsing (because I don't want to split up the terminal strings) 127 | 128 | 129 | var grammarGrammar = Grammar([ 130 | Rule('Grammar', [T('Grammar([\n '), NT('Rule'), NT('RulesList'), T('\n]);')]), 131 | Rule('RulesList', [T(',\n '), NT('Rule'), NT('RulesList')]), 132 | Rule('RulesList', []), 133 | Rule('Rule', [T('Rule(\''), NT('NT'), T('\', ['), NT('OptionalSymList'), T('])')]), 134 | Rule('OptionalSymList', [NT('Sym'), NT('SymList')]), 135 | Rule('OptionalSymList', []), 136 | Rule('SymList', [T(', '), NT('Sym'), NT('SymList')]), 137 | Rule('SymList', []), 138 | Rule('Sym', [T('T(\''), NT('T'), T('\')')]), 139 | Rule('Sym', [T('NT(\''), NT('NT'), T('\')')]), 140 | Rule('T', [T('x')]), 141 | Rule('T', [T('y')]), 142 | Rule('T', [T('z')]), 143 | Rule('NT', [T('A')]), 144 | Rule('NT', [T('B')]), 145 | Rule('NT', [T('C')]), 146 | Rule('NT', [T('D')]), 147 | Rule('NT', [T('E')]) 148 | ]); 149 | 150 | var ggg = generator(grammarGrammar); 151 | 152 | function makeGrammar() { 153 | var x = ggg(Math.round(Math.random()*400) + 40); 154 | //console.log(x); 155 | return eval(x); // eval? yes. eval. 156 | } 157 | 158 | // Generate ten random context-free grammars, and ensure that the set of strings 159 | // each generates appears to be at least a subset of the set of strings each recognizes. 160 | // (Of course, the sets should be identical, but that's harder to test.) 161 | console.log('CFG tests:'); 162 | for(var i=0; i<5; ++i) { 163 | //console.log(i); 164 | var g = makeGrammar(); 165 | var w = checks.locatableDifference(g, g, 4, 10); 166 | if(w) { 167 | console.log(w); 168 | process.exit(); 169 | } 170 | } 171 | console.log('Passed.'); 172 | 173 | -------------------------------------------------------------------------------- /types.js: -------------------------------------------------------------------------------- 1 | function Sym(type, data) { 2 | this.type = type; 3 | this.data = data; 4 | } 5 | Sym.prototype.equals = function(other) { 6 | return other.type === this.type && other.data === this.data; 7 | } 8 | Sym.prototype.toString = function(){ 9 | return this.data.toString(); //return this.type + '(' + this.data + ')'; 10 | } 11 | 12 | function NT(data) { return new Sym('NT', data); } 13 | function T(data) { return new Sym('T', data); } 14 | 15 | function reprEscape(str) { // does not handle unicode or exceptional cases properly. 16 | return str.replace(/['\\]/g, function(c) { return '\\' + c; }) 17 | .replace(/\n/g, '\\n').replace(/\r/g, '\\r'); 18 | } 19 | 20 | function Rule(name, production) { 21 | if(!(this instanceof Rule)) return new Rule(name, production); 22 | this.name = name; // LHS 23 | this.production = production; // RHS\ 24 | } 25 | Rule.prototype.equals = function(other) { 26 | if(other.name !== this.name) return false; 27 | if(other.production.length !== this.production.length) return false; 28 | 29 | for(var i=0; i ' + this.production.join(''); 36 | } 37 | Rule.prototype.repr = function() { 38 | var out = 'Rule(\'' + reprEscape(this.name) + '\', ['; 39 | for(var i=0; i0) out += ', '; 41 | out += this.production[i].type + '(\'' + reprEscape(this.production[i].data) + '\')'; 42 | } 43 | out += '])'; 44 | return out; 45 | } 46 | 47 | 48 | 49 | 50 | function Grammar(rules, start) { // if not given, start is LHS of the first rule. 51 | if(!(this instanceof Grammar)) return new Grammar(rules, start); 52 | this.rules = rules; 53 | this.start = start || rules[0].name; // TODO warn 54 | this.symbolMap = {}; // initially just rules for each symbol; eventually can contain annotations like 'nullable' 55 | this.symbolsList = start?[start]:[]; 56 | 57 | if(start) this.symbolMap[start] = {rules: []}; 58 | 59 | for(var i=0; i0) out += ',\n '; 80 | out += this.rules[i].repr(); 81 | } 82 | out += '\n], \'' + reprEscape(this.start) + '\')'; 83 | return out; 84 | } 85 | 86 | 87 | // get a map from symbols to a list of the rules they appear in the RHS of 88 | // if a symbol appears in a RHS more than once, that rule will appear more than once in the list 89 | // modifies the grammar to have _reverseMap property, for caching 90 | Grammar.prototype.getReverseMap = function() { 91 | if(!this.hasOwnProperty('_reverseMap')) { 92 | this._reverseMap = {}; 93 | for(var i=0; i