├── .gitignore
├── LICENSE
├── README.md
├── package.json
└── src
    ├── ast.js
    ├── compile.js
    ├── grammar-mode.js
    ├── graph.js
    ├── matchexpr.js
    ├── mode.js
    └── parse.js


/.gitignore:
--------------------------------------------------------------------------------
1 | .tern-port
2 | /node_modules
3 | /dist
4 | /src/scratch


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2017 by Marijn Haverbeke <marijnh@gmail.com> and others
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CodeMirror grammar mode
  2 | 
  3 | This is an experimental tool for building CodeMirror modes from
  4 | grammar descriptions.
  5 | 
  6 | You write a grammar like this:
  7 | 
  8 |     skip (" " | "\t" | "\n")* {
  9 |       Expr { (num | var | ParenExpr) (op Expr)? }
 10 |       context ParenExpr { "(" Expr ")" }
 11 |     }
 12 |     tokens {
 13 |       num="number" { digit+ }
 14 |       var="variable" { letter (letter | digit)* }
 15 |       op { "+" | "-" | "/" | "*" }
 16 |     }
 17 |     digit { "0"-"9" }
 18 |     letter { "a"-"z" | "A"-"Z" }
 19 | 
 20 | And then run `grammar-mode` on it to convert it into a JavaScript
 21 | file. This file will export a set of bindings that can be given to the
 22 | accompanying interpreter (in `src/mode.js`) to create a CodeMirror
 23 | mode.
 24 | 
 25 | ## Grammar syntax
 26 | 
 27 | A grammar is a set of rules. Rules may appear on the top level or
 28 | within `tokens` or `skip` blocks. The rules within `tokens` are
 29 | considered the base token types of the language, and will be fallen
 30 | back on when nothing else matches. A `skip` block is used to
 31 | automatically insert whitespace-like productions between the elements
 32 | of the rules inside of it.
 33 | 
 34 | Each rule has a name, optionally followed by the keyword `context` to
 35 | mark it as a rule for which a context has to be pushed onto the
 36 | context stack. Contexts can be used by external code to do things like
 37 | computing indentation based on what rules are currently active.
 38 | 
 39 | After the rule name, you can add an equals sign and a quoted string to
 40 | set a token type for the rule (for example `num="number"` in the
 41 | example). That token type will be used to highlight the text that
 42 | matches the rule.
 43 | 
 44 | Each rule contains a match expression, which is built up like this:
 45 | 
 46 |  - A `"literal string"` (using JSON string syntax) matches that exact
 47 |    text.
 48 | 
 49 |  - An underscore matches any character, and a period matches any
 50 |    character except newlines.
 51 | 
 52 |  - A character range is written as two single-character strings
 53 |    with a dash in between.
 54 | 
 55 |  - An unquoted word is a reference to another rule.
 56 | 
 57 |  - Multiple expressions separated by whitespace indicate that these
 58 |    things must match in sequence.
 59 | 
 60 |  - Parentheses can be used around expressions to group them.
 61 | 
 62 |  - Multiple expressions separated by pipe characters indicate a choice
 63 |    between those expressions. The first choice that matches is taken.
 64 | 
 65 |  - A `+`, `*`, or `?` after an expression allows that expression to
 66 |    occur one or more (`+`), zero or more (`*`), or zero or one (`?`)
 67 |    times. This is done greedily — as many repetitions as possible are
 68 |    matched.
 69 | 
 70 |  - A `~` or `!` character followed by an expression denotes a
 71 |    lookahead — positive lookahead for `~` and negative for `!`.
 72 | 
 73 |  - An `&` followed by a name is a call to a predicate. This is an
 74 |    external function that will be called to determine whether a given
 75 |    position matches.
 76 | 
 77 | ## Single-edge lookahead
 78 | 
 79 | A grammar is compiled to a set of state machines, whose edges are
 80 | regular expressions, possibly extended with predicate calls and
 81 | lookaheads, or calls to rules. When parsing, the interpreter will take
 82 | the first edge that matches and consumes input, without looking ahead
 83 | further.
 84 | 
 85 | The catch is that you have somehow write your grammar so that the
 86 | right choice is made at every point. If something is ambiguous, the
 87 | parser will just always take the first path. So, depending on your
 88 | grammar, you might have to insert lookaheads to disambiguate things.
 89 | For example, to distinguish between a variable and a label in a C-like
 90 | language, you'd need rules something like this:
 91 | 
 92 |     Statement {
 93 |       label ":" |
 94 |       variable |
 95 |       otherThing
 96 |     }
 97 |     
 98 |     label="meta" { letter+ ~(spaceChar* ":") }
 99 |     variable="variable" { letter+ }
100 | 
101 | ## Command-line parameters
102 | 
103 | The `grammar-mode` command expects a file as argument, or will read
104 | from standard input when not given one. Other, optional, arguments
105 | include:
106 | 
107 |  * `--output file` specifies a file to write the output to (defaults
108 |    to standard output).
109 | 
110 |  * `--es-module` tells the tool to output an ES6 module (default is a
111 |    CommonJS module).
112 | 
113 |  * `--graph` will cause it to output a graph in .dot format instead of
114 |    a JavaScript module. Can be useful for debugging.
115 | 
116 |  * `--names` will cause the JavaScript output to be more verbose but
117 |    easier to read, using string names rather than numbers for the
118 |    nodes.
119 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "codemirror-grammar-mode",
 3 |   "version": "0.1.10",
 4 |   "description": "Experimental approach to writing CodeMirror modes",
 5 |   "bin": {
 6 |     "grammar-mode": "./src/grammar-mode.js"
 7 |   },
 8 |   "main": "src/mode.js",
 9 |   "scripts": {
10 |     "test": "mocha test/test-*.js"
11 |   },
12 |   "repository": {
13 |     "type": "git",
14 |     "url": "https://github.com/codemirror/grammar-mode/"
15 |   },
16 |   "keywords": [
17 |     "syntax",
18 |     "highlighting",
19 |     "editor",
20 |     "codemirror",
21 |     "mode",
22 |     "grammar",
23 |     "parser"
24 |   ],
25 |   "author": "Marijn Haverbeke <marijnh@gmail.com>",
26 |   "license": "MIT",
27 |   "devDependencies": {
28 |     "codemirror": "^5.25.2"
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/ast.js:
--------------------------------------------------------------------------------
  1 | function build(type, from, props) {
  2 |   props.type = type
  3 |   props.start = from.start
  4 |   props.end = from.end
  5 |   return props
  6 | }
  7 | 
  8 | function noSkipAfter(node) {
  9 |   let t = node.type
 10 |   return t == "LookaheadMatch" || t == "PredicateMatch" || t == "Label" ||
 11 |     t == "RepeatedMatch" && node.kind != "?"
 12 | }
 13 | 
 14 | // Replaces super matches, inserts skip matches in the appropriate
 15 | // places, splits string matches with newlines, and collapses nested
 16 | // sequence/choice expressions, so that further passes don't have to
 17 | // worry about those.
 18 | let normalizeExpr = exports.normalizeExpr = function(expr, ruleName, superGrammar, skip, prefix) {
 19 |   if (expr.type == "StringMatch" && expr.value.length > 1 && expr.value.indexOf("\n") > -1) {
 20 |     let exprs = []
 21 |     expr.value.split(/\n/).forEach((part, i) => {
 22 |       if (i) exprs.push(build("StringMatch", expr, {value: "\n"}))
 23 |       if (part.length) exprs.push(build("StringMatch", expr, {value: part}))
 24 |     })
 25 |     return build("SequenceMatch", expr, {exprs})
 26 |   } else if (expr.type == "RuleIdentifier") {
 27 |     for (let i = 0; i < expr.arguments.length; i++)
 28 |       expr.arguments[i] = normalizeExpr(expr.arguments[i], ruleName, superGrammar, skip, prefix)
 29 |     if (prefix) expr.id.name = prefix + expr.id.name
 30 |   } else if (expr.type == "RepeatedMatch") {
 31 |     let inner = normalizeExpr(expr.expr, ruleName, superGrammar, skip, prefix)
 32 |     if (skip && expr.kind != "?") inner = build("SequenceMatch", inner, {exprs: [inner, skip]})
 33 |     expr.expr = inner
 34 |   } else if (expr.type == "LookaheadMatch") {
 35 |     expr.expr = normalizeExpr(expr.expr, ruleName, null, skip, prefix)
 36 |   } else if (expr.type == "SequenceMatch") {
 37 |     let exprs = []
 38 |     for (let i = 0; i < expr.exprs.length; i++) {
 39 |       let next = normalizeExpr(expr.exprs[i], ruleName, superGrammar, skip, prefix)
 40 |       if (next.type == "SequenceMatch") exprs = exprs.concat(next.exprs)
 41 |       else exprs.push(next)
 42 |       if (skip && i < expr.exprs.length - 1 && !noSkipAfter(next))
 43 |         exprs.push(skip)
 44 |     }
 45 |     expr.exprs = exprs
 46 |   } else if (expr.type == "ChoiceMatch") {
 47 |     let exprs = []
 48 |     for (let i = 0; i < expr.exprs.length; i++) {
 49 |       let next = normalizeExpr(expr.exprs[i], ruleName, superGrammar, skip, prefix)
 50 |       if (next.type == "ChoiceMatch") exprs = exprs.concat(next.exprs)
 51 |       else exprs.push(next)
 52 |     }
 53 |     expr.exprs = exprs
 54 |   } else if (expr.type == "SuperMatch") {
 55 |     for (let grammar = superGrammar; grammar; grammar = grammar.super) {
 56 |       let rule = grammar.rules[ruleName]
 57 |       if (rule) return normalizeExpr(rule.expr, ruleName, grammar.super, skip, prefix)
 58 |     }
 59 |     throw new SyntaxError(`No super rule found for '${ruleName}'`)
 60 |   }
 61 |   return expr
 62 | }
 63 | 
 64 | let eqExpr = exports.eqExpr = function(a, b) {
 65 |   if (a.type != b.type) return false
 66 |   if (a.type == "StringMatch") return a.value == b.value
 67 |   if (a.type == "CharacterRange") return a.from == b.from && a.to == b.to
 68 |   if (a.type == "AnyMatch" || a.type == "DotMatch") return true
 69 |   if (a.type == "RuleIdentifier") return a.id.name == b.id.name && eqExprs(a.arguments, b.arguments)
 70 |   if (a.type == "RepeatedMatch" || a.type == "LookaheadMatch") return a.kind == b.kind && eqExpr(a.expr, b.expr)
 71 |   if (a.type == "SequenceMatch" || a.type == "ChoiceMatch") return eqExprs(a.exprs, b.exprs)
 72 |   if (a.type == "PredicateMatch") return a.id.name == b.id.name
 73 |   throw new Error("Missed case in eqExpr: " + a.type)
 74 | }
 75 | 
 76 | let eqExprs = exports.eqExprs = function(a, b) {
 77 |   if (a.length != b.length) return false
 78 |   for (let i = 0; i < a.length; i++) if (!eqExpr(a[i], b[i])) return false
 79 |   return true
 80 | }
 81 | 
 82 | function instantiateArray(params, args, exprs) {
 83 |   let updated = null
 84 |   for (let i = 0; i < exprs.length; i++) {
 85 |     let cur = exprs[i], inst = instantiateArgs(params, args, cur)
 86 |     if (cur != inst && !updated) updated = exprs.slice(0, i)
 87 |     if (updated) updated.push(inst)
 88 |   }
 89 |   return updated || exprs
 90 | }
 91 | 
 92 | let instantiateArgs = exports.instantiateArgs = function(params, args, expr) {
 93 |   if (expr.type == "RuleIdentifier") {
 94 |     let pos = params.indexOf(expr.id.name)
 95 |     if (pos > -1) {
 96 |       if (expr.arguments.length) throw new Error("Arguments to params not supported yet")
 97 |       return args[pos]
 98 |     }
 99 |     let newArgs = instantiateArray(params, args, expr.arguments)
100 |     return newArgs == expr.arguments ? expr : build(expr.type, expr, {id: expr.id, arguments: newArgs})
101 |   } else if (expr.type == "RepeatedMatch" || expr.type == "LookaheadMatch") {
102 |     let inst = instantiateArgs(params, args, expr.expr)
103 |     return inst != expr.expr ? build(expr.type, expr, {expr: inst, kind: expr.kind}) : expr
104 |   } else if (expr.type == "SequenceMatch" || expr.type == "ChoiceMatch") {
105 |     let updated = instantiateArray(params, args, expr.exprs)
106 |     return updated != expr.exprs ? build(expr.type, expr, {exprs: updated}) : expr
107 |   } else {
108 |     return expr
109 |   }
110 | }
111 | 
112 | function forEachExpr(expr, f) {
113 |   if (f(expr) === false) return
114 |   if (expr.type == "RepeatedMatch" || expr.type == "LookaheadMatch")
115 |     forEachExpr(expr.expr, f)
116 |   else if (expr.type == "SequenceMatch" || expr.type == "ChoiceMatch")
117 |     for (let i = 0; i < expr.exprs.length; i++) forEachExpr(expr.exprs[i], f)
118 |   else if (expr.type == "RuleIdentifier")
119 |     for (let i = 0; i < expr.arguments.length; i++) forEachExpr(expr.arguments[i], f)
120 | }
121 | exports.forEachExpr = forEachExpr
122 | 


--------------------------------------------------------------------------------
/src/compile.js:
--------------------------------------------------------------------------------
 1 | const {Call, Token} = require("./graph")
 2 | 
 3 | function buildEdgeInfo(graphs, getName, options) {
 4 |   let edgeList = [], matchN = 0
 5 | 
 6 |   for (let name in graphs) {
 7 |     let graph = graphs[name]
 8 |     for (let node = 0; node < graph.nodes.length; node++) {
 9 |       let edges = graph.nodes[node], nodeName = getName(name, node)
10 |       for (let i = 0; i < edges.length; i++) {
11 |         let {match, effect, to} = edges[i], matchStr = match.toExpr(getName)
12 |         let useMatch = -1
13 |         if (matchStr.length > 8 && !options.names) for (let j = 0; j < edgeList.length; j++) {
14 |           let other = edgeList[j]
15 |           if (other.match == matchStr) {
16 |             useMatch = other.useMatch == -1 ? other.useMatch = matchN++ : other.useMatch
17 |             break
18 |           }
19 |         }
20 |         edgeList.push({
21 |           from: nodeName,
22 |           to,
23 |           match: useMatch == -1 ? matchStr : null,
24 |           useMatch,
25 |           effect,
26 |           graph: name
27 |         })
28 |       }
29 |     }
30 |   }
31 |   return edgeList
32 | }
33 | 
34 | // An edge can be one of the following:
35 | // 0, nextNode                           null edge
36 | // 1, callTarget, returnTo               regular call
37 | // 2, callTarget, returnTo, context      context call
38 | // 3, tokenType, matchExpr, nextNode     token edge
39 | // matchExpr, nextNode                   regular match edge
40 | function compileEdge(edgeInfo, getName) {
41 |   let to = edgeInfo.to == null ? -1 : getName(edgeInfo.graph, edgeInfo.to)
42 |   if (edgeInfo.effect instanceof Call) {
43 |     let {target, context} = edgeInfo.effect
44 |     if (!context) return `1, ${getName(target.name)}, ${to}`
45 |     return `2, ${getName(target.name)}, ${to}, ${JSON.stringify(context)}`
46 |   }
47 |   let match = edgeInfo.useMatch != -1 ? `e[${edgeInfo.useMatch}]` : edgeInfo.match
48 |   if (edgeInfo.effect instanceof Token)
49 |     return `3, ${JSON.stringify(edgeInfo.effect.type)}, ${match}, ${to}`
50 |   if (match == "null")
51 |     return `0, ${to}`
52 |   return `${match}, ${to}`
53 | }
54 | 
55 | function buildNamer(graphs, options) {
56 |   if (options.names) {
57 |     return (graphName, node) => JSON.stringify(graphName + (node ? "$" + node : ""))
58 |   } else {
59 |     let offsets = {}, offset = 0
60 |     for (let name in graphs) {
61 |       offsets[name] = offset
62 |       offset += graphs[name].nodes.length
63 |     }
64 |     return (graphName, node) => offsets[graphName] + (node || 0)
65 |   }
66 | }
67 | 
68 | module.exports = function(graphs, options = {}) {
69 |   let getName = buildNamer(graphs, options)
70 |   let edgeInfo = buildEdgeInfo(graphs, getName, options)
71 | 
72 |   let exprVector = []
73 |   for (let i = 0; i < edgeInfo.length; i++) {
74 |     let info = edgeInfo[i]
75 |     if (info.useMatch > -1 && info.match) exprVector[info.useMatch] = info.match
76 |   }
77 | 
78 |   let code = "", exp = options.esModule ? "export var " : "exports."
79 |   if (exprVector.length) code += `var e = [${exprVector.join(", ")}]\n`
80 |   let edges = [], nodes = []
81 |   for (let curNode = edgeInfo[0].from, i = 0;; i++) {
82 |     let info = edgeInfo[i]
83 |     if (!info || info.from != curNode) {
84 |       if (options.names) nodes.push(`${curNode}: [\n    ${edges.join(",\n    ")}\n  ]`)
85 |       else nodes.push(`[${edges.join(",\n   ")}]`)
86 |       if (!info) break
87 |       curNode = info.from
88 |       edges.length = 0
89 |     }
90 |     edges.push(compileEdge(info, getName))
91 |   }
92 |   code += `${exp}nodes = ${options.names ? "{" : "["}\n  ${nodes.join(",\n  ")}\n${options.names ? "}" : "]"}\n`
93 |   code += `${exp}start = ${getName("_start")}\n`
94 |   if (options.token !== false)
95 |     code += `${exp}token = ${getName("_token")}\n`
96 | 
97 |   return code
98 | }
99 | 


--------------------------------------------------------------------------------
/src/grammar-mode.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const parse = require("./parse")
 4 | const compile = require("./compile")
 5 | const {buildGraph} = require("./graph")
 6 | const path = require("path"), fs = require("fs")
 7 | 
 8 | let input = null, outputGraph = false, names = false, token = true, esModule = false, output = null
 9 | 
10 | // declare global: process
11 | for (let i = 2; i < process.argv.length; i++) {
12 |   let arg = process.argv[i]
13 |   if (arg == "--graph") outputGraph = true
14 |   else if (arg == "--no-token") token = false
15 |   else if (arg == "--es-module") esModule = true
16 |   else if (arg == "--names") names = true
17 |   else if (arg == "--output") output = process.argv[++i]
18 |   else if (arg == "--help") usage(0)
19 |   else if (input || arg[0] == "-") usage(1)
20 |   else input = arg
21 | }
22 | 
23 | function usage(code) {
24 |   ;(code ? process.stderr : process.stdout).write(
25 |     "grammar-mode [file] [--output file] [--es-module] [--no-token] [--graph] [--names]\n"
26 |   )
27 |   process.exit(code)
28 | }
29 | 
30 | if (input) {
31 |   out(run(parseWithSuper(path.dirname(input), fs.readFileSync(input, "utf8"), input)))
32 | } else {
33 |   let buffer = ""
34 |   process.stdin.resume()
35 |   process.stdin.on("data", chunk => buffer += chunk.toString("utf8"))
36 |   process.stdin.on("end", () => out(run(parseWithSuper(process.cwd(), buffer, null))))
37 | }
38 | 
39 | function parseWithSuper(base, input, fileName) {
40 |   let ast = parse(input, fileName)
41 |   if (ast.extends) {
42 |     let file = path.resolve(base, ast.extends)
43 |     ast.super = parseWithSuper(path.dirname(file), fs.readFileSync(file, "utf8"), file)
44 |   }
45 |   for (let i = 0; i < ast.included.length; i++) {
46 |     let file = path.resolve(base, ast.included[i].value)
47 |     ast.included[i].ast = parseWithSuper(path.dirname(file), fs.readFileSync(file, "utf8"), file)
48 |   }
49 |   return ast
50 | }
51 | 
52 | function run(ast) {
53 |   let options = {token, esModule, names}
54 |   let graphs = buildGraph(ast, options)
55 |   if (outputGraph)
56 |     return `digraph{\n${Object.keys(graphs).map(k => graphs[k].toString()).join("")}}\n`
57 |   else
58 |     return compile(graphs, options)
59 | }
60 | 
61 | function out(string) {
62 |   if (output) fs.writeFileSync(output, string, "utf8")
63 |   else process.stdout.write(string, "utf8")
64 | }
65 | 


--------------------------------------------------------------------------------
/src/graph.js:
--------------------------------------------------------------------------------
  1 | const {nullMatch, anyMatch, dotMatch, StringMatch, RangeMatch, SeqMatch,
  2 |        ChoiceMatch, RepeatMatch, LookaheadMatch, PredicateMatch} = require("./matchexpr")
  3 | const {normalizeExpr, eqExprs, instantiateArgs, forEachExpr} = require("./ast")
  4 | 
  5 | exports.buildGraph = function(grammar, options) {
  6 |   let {rules, start, tokens} = gatherRules(grammar)
  7 |   countReferences(rules, start, tokens)
  8 |   let cx = new Context(rules, Object.create(null))
  9 |   let startGraph = cx.registerGraph("_start", new SubGraph), after = startGraph.node()
 10 |   startGraph.copy(0, after, cx.evalCall(start, []))
 11 |   startGraph.edge(after, 0, anyMatch)
 12 |   let startGraphs = ["_start"]
 13 | 
 14 |   if (options.token !== false) {
 15 |     let tokenGraph = cx.registerGraph("_token", new SubGraph)
 16 |     for (let i = 0; i < tokens.length; i++)
 17 |       tokenGraph.copy(0, null, cx.evalCall(tokens[i], []))
 18 |     tokenGraph.edge(0, null, anyMatch)
 19 |     startGraphs.push("_token")
 20 |   }
 21 | 
 22 |   return gcGraphs(cx.graphs, startGraphs)
 23 | }
 24 | 
 25 | class Call {
 26 |   constructor(target, context) { this.target = target; this.context = context }
 27 |   toString() { return `CALL(${this.target.name})` }
 28 | }
 29 | exports.Call = Call
 30 | class Token {
 31 |   constructor(type) { this.type = type }
 32 |   toString() { return `TOKEN(${this.type})` }
 33 | }
 34 | exports.Token = Token
 35 | 
 36 | class Edge {
 37 |   constructor(to, match, effect) {
 38 |     this.to = to
 39 |     this.match = match
 40 |     this.effect = effect
 41 |   }
 42 | 
 43 |   toString(graph, from) {
 44 |     let result = `${graph}_${from} -> ${graph}_${this.to == null ? "RET" : this.to}`, label = this.match.toRegexp()
 45 |     if (this.effect) label = (label ? label + " " : "") + this.effect.toString()
 46 |     if (label) result += `[label=${JSON.stringify(label)}]`
 47 |     return result
 48 |   }
 49 | 
 50 |   canCombine(other) {
 51 |     if (this.effect instanceof Call || other.effect instanceof Call) return false
 52 |     let thisIsolated = this.match.isolated || !!this.effect, otherIsolated = other.match.isolated || !!other.effect
 53 |     return thisIsolated ? other.match.isNull && !otherIsolated
 54 |       : otherIsolated ? this.match.isNull
 55 |       : true
 56 |   }
 57 | }
 58 | 
 59 | class Rule {
 60 |   constructor(name, expr, params, context) {
 61 |     this.name = name
 62 |     this.expr = expr
 63 |     this.params = params
 64 |     this.context = context
 65 |     this.instances = []
 66 |     this.recursive = null
 67 |     this.refcount = 0
 68 |   }
 69 | 
 70 |   getInstance(cx, args) {
 71 |     for (let i = 0; i < this.instances.length; i++) {
 72 |       let inst = this.instances[i]
 73 |       if (eqExprs(inst.args, args)) {
 74 |         if (this.recursive !== false)
 75 |           this.recursive = true
 76 |         return inst.graph
 77 |       }
 78 |     }
 79 |     let graph = cx.registerGraph(this.name, new SubGraph)
 80 |     this.instances.push({args, graph})
 81 |     let result = cx.evalExpr(instantiateArgs(this.params, args, this.expr))
 82 |     graph.nodes = result.nodes
 83 |     if (this.recursive === null) this.recursive = false
 84 |     return graph
 85 |   }
 86 | }
 87 | 
 88 | class SubGraph {
 89 |   constructor() {
 90 |     this.name = null
 91 |     this.nodes = [[]]
 92 |   }
 93 | 
 94 |   get edgeCount() {
 95 |     let count = 0
 96 |     for (let i = 0; i < this.nodes.length; i++)
 97 |       count += this.nodes[i].length
 98 |     return count
 99 |   }
100 | 
101 |   node() {
102 |     return this.nodes.push([]) - 1
103 |   }
104 | 
105 |   edge(from, to, match, effect) {
106 |     this.nodes[from].push(new Edge(to, match, effect))
107 |   }
108 | 
109 |   copy(from, to, source, start = 0) {
110 |     let mapping = []
111 |     mapping[start] = from
112 |     let work = [start], workIndex = 0
113 |     while (workIndex < work.length) {
114 |       let cur = work[workIndex++], edges = source.nodes[cur]
115 |       for (let i = 0; i < edges.length; i++) {
116 |         let edge = edges[i]
117 |         if (edge.to != null && work.indexOf(edge.to) == -1) {
118 |           mapping[edge.to] = this.node()
119 |           work.push(edge.to)
120 |         }
121 |         this.edge(mapping[cur], edge.to == null ? to : mapping[edge.to], edge.match, edge.effect)
122 |       }
123 |     }
124 |   }
125 | 
126 |   join(mayHaveOutgoing) {
127 |     let found = []
128 |     this.edges((e, n) => { if (e.to == null) found.push(n, e) })
129 |     if (found.length == 2) {
130 |       let edge = found[1], node = this.nodes[found[0]]
131 |       if (edge.match == nullMatch && !edge.effect && (mayHaveOutgoing || node.length == 1)) {
132 |         node.splice(node.indexOf(edge), 1)
133 |         return found[0]
134 |       }
135 |     }
136 |     let add = this.node()
137 |     for (let i = 1; i < found.length; i += 2) found[i].to = add
138 |     return add
139 |   }
140 | 
141 |   edges(f) {
142 |     for (let i = 0; i < this.nodes.length; i++) {
143 |       let edges = this.nodes[i]
144 |       for (let j = 0; j < edges.length; j++) f(edges[j], i)
145 |     }
146 |   }
147 | 
148 |   countReferences(node) {
149 |     let count = 0
150 |     this.edges(e => { if (e.to == node) count++ })
151 |     return count
152 |   }
153 | 
154 |   toString() {
155 |     let output = ""
156 |     this.edges((e, n) => output += "  " + e.toString(this.name, n) + ";\n")
157 |     return output
158 |   }
159 | 
160 |   singleEdgeFrom(node) {
161 |     let edges = this.nodes[node]
162 |     return edges.length == 1 ? edges[0] : null
163 |   }
164 | 
165 |   singleEdgeTo(node) {
166 |     let found = null
167 |     this.edges(e => {
168 |       if (e.to == node) found = found == null ? e : false
169 |     })
170 |     return found === false ? null : found
171 |   }
172 | 
173 |   get simple() {
174 |     if (this.nodes.length != 1) return null
175 |     let node = this.nodes[0]
176 |     if (node.length != 1 || node[0].effect) return null
177 |     return node[0].match
178 |   }
179 | 
180 |   static simple(match, effect) {
181 |     let graph = new SubGraph
182 |     graph.edge(0, null, match, effect)
183 |     return graph
184 |   }
185 | }
186 | 
187 | SubGraph.any = SubGraph.simple(anyMatch)
188 | SubGraph.dot = SubGraph.simple(dotMatch)
189 | 
190 | const MAX_INLINE_BLOWUP = 20
191 | 
192 | class Context {
193 |   constructor(rules, graphs) {
194 |     this.rules = rules
195 |     this.graphs = graphs
196 |   }
197 | 
198 |   registerGraph(name, graph) {
199 |     for (let i = 0;; i++) {
200 |       let cur = name + (i ? "_" + i : "")
201 |       if (!(cur in this.graphs)) {
202 |         graph.name = cur
203 |         return this.graphs[cur] = graph
204 |       }
205 |     }
206 |   }
207 | 
208 |   evalExpr(expr) {
209 |     let t = expr.type
210 |     if (t == "CharacterRange") {
211 |       return SubGraph.simple(new RangeMatch(expr.from, expr.to))
212 |     } else if (t == "StringMatch") {
213 |       return SubGraph.simple(new StringMatch(expr.value))
214 |     } else if (t == "AnyMatch") {
215 |       return SubGraph.any
216 |     } else if (t == "DotMatch") {
217 |       return SubGraph.dot
218 |     } else if (t == "RuleIdentifier") {
219 |       return this.evalCall(expr.id.name, expr.arguments)
220 |     } else if (t == "RepeatedMatch") {
221 |       return this.evalRepeat(expr.expr, expr.kind)
222 |     } else if (t == "SequenceMatch") {
223 |       return this.evalSequence(expr.exprs)
224 |     } else if (t == "ChoiceMatch") {
225 |       return this.evalChoice(expr.exprs)
226 |     } else if (t == "LookaheadMatch") {
227 |       let inner = this.evalExpr(expr.expr), simple = inner.simple, match
228 |       if (simple) {
229 |         match = new LookaheadMatch(null, simple, expr.kind == "~")
230 |       } else {
231 |         this.registerGraph("_lookahead", inner)
232 |         match = new LookaheadMatch(inner, null, expr.kind == "~")
233 |       }
234 |       return SubGraph.simple(match)
235 |     } else if (t == "PredicateMatch") {
236 |       return SubGraph.simple(new PredicateMatch(expr.id.name))
237 |     } else {
238 |       throw new Error("Unrecognized AST node type " + t)
239 |     }
240 |   }
241 | 
242 |   evalCall(name, args) {
243 |     let rule = this.rules[name]
244 |     if (args.length != rule.params.length) throw new Error("Wrong number of arguments for " + name)
245 |     let graph = rule.getInstance(this, args), simple = graph.simple
246 |     if (simple)
247 |       return SubGraph.simple(simple, rule.context && rule.context.token ? new Token(rule.context.token) : null)
248 |     else if (!rule.recursive && !rule.context && (rule.refcount == 1 || rule.refcount * graph.edgeCount <= MAX_INLINE_BLOWUP))
249 |       return graph
250 |     else
251 |       return SubGraph.simple(nullMatch, new Call(graph, rule.context))
252 |   }
253 | 
254 |   // FIXME there's still a bug here that showed up when doing listOf(x? y)
255 |   evalRepeat(expr, kind) {
256 |     let inner = this.evalExpr(expr), simple
257 |     if ((simple = inner.simple) && !simple.isolated)
258 |       return SubGraph.simple(new RepeatMatch(simple, kind))
259 |     let graph = new SubGraph
260 |     if (kind == "*") {
261 |       graph.copy(0, 0, inner)
262 |       graph.edge(0, null, nullMatch)
263 |     } else if (kind == "+") {
264 |       let next = graph.node()
265 |       graph.copy(0, next, inner)
266 |       graph.edge(next, 0, nullMatch)
267 |       graph.edge(next, null, nullMatch)
268 |     } else if (kind == "?") {
269 |       graph.copy(0, null, inner)
270 |       graph.edge(0, null, nullMatch)
271 |     }
272 |     return graph
273 |   }
274 | 
275 |   evalSequence(exprs) {
276 |     let graph = new SubGraph, edge = graph.edge(0, null, nullMatch)
277 |     for (let i = 0; i < exprs.length; i++) {
278 |       let next = this.evalExpr(exprs[i])
279 |       let firstEdge, copyFrom = 0
280 |       if (edge && (firstEdge = next.singleEdgeFrom(0)) && !firstEdge.effect && edge.canCombine(firstEdge)) {
281 |         edge.match = SeqMatch.create(edge.match, firstEdge.match)
282 |         copyFrom = firstEdge.to
283 |       }
284 |       if (copyFrom != null) {
285 |         let hasIncoming = next.countReferences(copyFrom) == (copyFrom == 0 ? 0 : 1)
286 |         graph.copy(graph.join(hasIncoming), null, next, copyFrom)
287 |         if (i < exprs.length - 1)
288 |           edge = graph.singleEdgeTo(null)
289 |       }
290 |     }
291 |     return graph
292 |   }
293 | 
294 |   evalChoice(exprs) {
295 |     let graph = new SubGraph
296 |     for (let i = 0, last = exprs.length - 1, next = null; i <= last; i++) {
297 |       let curGraph = next || this.evalExpr(exprs[i]), simple = curGraph.simple
298 |       next = null
299 |       if (simple) {
300 |         while (i < last) {
301 |           let nextExpr = this.evalExpr(exprs[i + 1]), nextSimple = nextExpr.simple
302 |           if (nextSimple) {
303 |             simple = ChoiceMatch.create(simple, nextSimple)
304 |             i++
305 |           } else {
306 |             next = nextExpr
307 |             break
308 |           }
309 |         }
310 |         graph.edge(0, null, simple)
311 |       } else {
312 |         let start = 0
313 |         if (curGraph.countReferences(0) > 0)
314 |           graph.edge(0, start = graph.node(), nullMatch)
315 |         graph.copy(start, null, curGraph)
316 |       }
317 |     }
318 |     return graph
319 |   }
320 | }
321 | 
322 | function gatherRules(grammar) {
323 |   let info = {rules: Object.create(null), start: null, tokens: []}
324 |   function gather(grammar, prefix) {
325 |     let explicitStart = null
326 |     for (let name in grammar.rules) {
327 |       let ast = grammar.rules[name]
328 |       if (ast.start) {
329 |         if (explicitStart) throw new Error("Multiple start rules")
330 |         explicitStart = name
331 |       }
332 |       let ruleName = prefix + name
333 |       if (info.rules[ruleName]) continue
334 |       let expr = normalizeExpr(ast.expr, name, grammar.super, ast.skip, prefix)
335 |       info.rules[ruleName] = new Rule(ruleName, expr, ast.params.map(n => prefix + n.name),
336 |                                       !ast.context && !ast.tokenType ? null : ast.tokenType ? {name: ruleName, token: ast.tokenType}
337 |                                       : {name: ruleName})
338 |     }
339 |     if (grammar.super) gather(grammar.super, prefix)
340 |     for (let i = 0; i < grammar.included.length; i++) {
341 |       let inc = grammar.included[i]
342 |       gather(inc.ast, prefix + inc.id.name + ".")
343 |     }
344 |     if (explicitStart) info.start = explicitStart
345 |     for (let name in grammar.rules) {
346 |       if (info.start == null) info.start = name
347 |       if (grammar.rules[name].isToken && info.tokens.indexOf(name) == -1) info.tokens.push(name)
348 |     }
349 |   }
350 |   gather(grammar, "")
351 |   return info
352 | }
353 | 
354 | function countReferences(rules, start, tokens) {
355 |   function count(name, weight) {
356 |     let rule = rules[name]
357 |     if (!rule) throw new Error("Undefined rule " + name)
358 |     rule.refcount += weight
359 |   }
360 |   count(start, 1)
361 |   for (let i = 0; i < tokens.length; i++) count(tokens[i], 1)
362 | 
363 |   function countExpr(weight, params) {
364 |     return expr => {
365 |       if (expr.type == "RuleIdentifier") {
366 |         if (params.indexOf(expr.id.name) == -1)
367 |           count(expr.id.name, weight)
368 |         for (let i = 0; i < expr.arguments.length; i++)
369 |           forEachExpr(expr.arguments[i], countExpr(2, params))
370 |         return false
371 |       }
372 |     }
373 |   }
374 | 
375 |   for (let name in rules) {
376 |     let rule = rules[name]
377 |     forEachExpr(rule.expr, countExpr(rule.params.length ? 2 : 1, rule.params))
378 |   }
379 | }
380 | 
381 | function gcGraphs(graphs, startNames) {
382 |   let work = startNames.slice(), workIndex = 0
383 |   function add(name) {
384 |     if (work.indexOf(name) < 0) work.push(name)
385 |   }
386 | 
387 |   while (workIndex < work.length) {
388 |     graphs[work[workIndex++]].edges(edge => {
389 |       if (edge.effect instanceof Call) add(edge.effect.target.name)
390 |       edge.match.forEach(m => {
391 |         if (m instanceof LookaheadMatch && m.start) add(m.start.name)
392 |       })
393 |     })
394 |   }
395 | 
396 |   let result = Object.create(null)
397 |   work.forEach(name => result[name] = graphs[name])
398 |   return result
399 | }
400 | 


--------------------------------------------------------------------------------
/src/matchexpr.js:
--------------------------------------------------------------------------------
  1 | function escRe(str) {
  2 |   return str.replace(/[^\w ¡-￿]/g, ch => {
  3 |     if (ch == "\n") return "\\n"
  4 |     if (ch == "\t") return "\\t"
  5 |     if (ch == "\r") return "\\r"
  6 |     return "\\" + ch
  7 |   })
  8 | }
  9 | 
 10 | function toSubRegexp(expr, wrapExpr) {
 11 |   if (expr.regexpPrec < wrapExpr.regexpPrec) return `(?:${expr.toRegexp()})`
 12 |   else return expr.toRegexp()
 13 | }
 14 | 
 15 | const OP_SEQ = 0, OP_CHOICE = 1,
 16 |       OP_STAR = 2, OP_PLUS = 3, OP_MAYBE = 4,
 17 |       OP_LOOKAHEAD = 5, OP_NEG_LOOKAHEAD = 6,
 18 |       OP_PREDICATE = 7
 19 | 
 20 | class MatchExpr {
 21 |   constructor() {}
 22 | 
 23 |   get isNull() { return false }
 24 |   get simple() { return true }
 25 |   get isolated() { return false }
 26 | 
 27 |   get regexpPrec() { return 4 }
 28 | 
 29 |   toExpr() {
 30 |     return `/^${toSubRegexp(this, SeqMatch.prototype)}/`
 31 |   }
 32 | 
 33 |   forEach(f) { f(this) }
 34 | }
 35 | 
 36 | class StringMatch extends MatchExpr {
 37 |   constructor(string) {
 38 |     super()
 39 |     this.string = string
 40 |   }
 41 | 
 42 |   get simple() { return this.string != "\n" }
 43 |   get isolated() { return !this.simple }
 44 | 
 45 |   eq(other) { return other instanceof StringMatch && other.string == this.string }
 46 | 
 47 |   toRegexp() { return escRe(this.string) }
 48 | 
 49 |   get regexpPrec() { return this.string.length == 1 ? super.regexpPrec : 2 }
 50 | 
 51 |   toExpr() { return JSON.stringify(this.string) }
 52 | }
 53 | exports.StringMatch = StringMatch
 54 | 
 55 | class RangeMatch extends MatchExpr {
 56 |   constructor(from, to) {
 57 |     super()
 58 |     this.from = from
 59 |     this.to = to
 60 |   }
 61 | 
 62 |   get simple() { return this.from > "\n" || this.to < "\n" }
 63 |   get isolated() { return !this.simple }
 64 | 
 65 |   eq(other) { return other instanceof RangeMatch && other.from == this.from && other.to == this.to }
 66 | 
 67 |   toRegexp() { return "[" + escRe(this.from) + "-" + escRe(this.to) + "]" }
 68 | }
 69 | exports.RangeMatch = RangeMatch
 70 | 
 71 | const anyMatch = exports.anyMatch = new class AnyMatch extends MatchExpr {
 72 |   get simple() { return false }
 73 |   get isolated() { return true }
 74 |   eq(other) { return other == anyMatch }
 75 |   toRegexp() { return "[^]" }
 76 | }
 77 | 
 78 | const dotMatch = exports.dotMatch = new class DotMatch extends MatchExpr {
 79 |   eq(other) { return other == dotMatch }
 80 |   toRegexp() { return "." }
 81 | }
 82 | 
 83 | const nullMatch = exports.nullMatch = new class NullMatch extends MatchExpr {
 84 |   get isNull() { return true }
 85 |   eq(other) { return other == anyMatch }
 86 |   toRegexp() { return "" }
 87 |   toExpr() { return "null" }
 88 | }
 89 | 
 90 | class SeqMatch extends MatchExpr {
 91 |   constructor(matches) {
 92 |     super()
 93 |     this.matches = matches
 94 |   }
 95 | 
 96 |   eq(other) { return other instanceof SeqMatch && eqArray(other.matches, this.matches) }
 97 | 
 98 |   get simple() {
 99 |     return this.matches.every(m => m.simple)
100 |   }
101 |   get isolated() {
102 |     return this.matches.some(m => m.isolated)
103 |   }
104 | 
105 |   get regexpPrec() { return 2 }
106 | 
107 |   toRegexp() { return this.matches.map(m => toSubRegexp(m, this)).join("") }
108 | 
109 |   toExpr(getName) {
110 |     if (this.simple) return super.toExpr()
111 |     return `[${OP_SEQ}, ${this.matches.map(m => m.toExpr(getName)).join(", ")}]`
112 |   }
113 | 
114 |   forEach(f) { f(this); this.matches.forEach(m => m.forEach(f)) }
115 | 
116 |   static create(left, right) {
117 |     if (left == nullMatch) return right
118 |     if (right == nullMatch) return left
119 | 
120 |     let before = left instanceof SeqMatch ? left.matches : [left]
121 |     let after = right instanceof SeqMatch ? right.matches : [right]
122 |     let last = before[before.length - 1], first = after[0]
123 | 
124 |     if (last instanceof StringMatch && first instanceof StringMatch) {
125 |       after[0] = new StringMatch(last.string + right.string)
126 |       before.pop()
127 |     } else if (first instanceof RepeatMatch && first.type == "*") {
128 |       if (last.eq(first.match)) {
129 |         after[0] = new RepeatMatch(last, "+")
130 |         before.pop()
131 |       } else if (first.match instanceof StringMatch && last instanceof StringMatch &&
132 |                  new RegExp(first.match.toRegexp() + "$").test(last.string)) {
133 |         after[0] = new RepeatMatch(first.match, "+")
134 |         before[before.length - 1] = new StringMatch(last.string.slice(0, last.string.length - first.match.string.length))
135 |       }
136 |     }
137 |     let matches = before.concat(after)
138 |     return matches.length == 1 ? matches[0] : new SeqMatch(matches)
139 |   }
140 | }
141 | exports.SeqMatch = SeqMatch
142 | 
143 | class ChoiceMatch extends MatchExpr {
144 |   constructor(matches) {
145 |     super()
146 |     this.matches = matches
147 |   }
148 | 
149 |   get simple() { return this.matches.every(m => m.simple) }
150 | 
151 |   get isolated() { return this.matches.some(m => m.isolated) }
152 | 
153 |   eq(other) { return other instanceof ChoiceMatch && eqArray(other.matches, this.matches) }
154 | 
155 |   get regexpPrec() { return this.isSet() ? 4 : 1 }
156 | 
157 |   isSet() {
158 |     return this.matches.every(m => m instanceof StringMatch && m.string.length == 1 || m instanceof RangeMatch)
159 |   }
160 | 
161 |   // FIXME reduce to \d, \w when appropriate
162 |   toRegexp() {
163 |     if (this.isSet())
164 |       return `[${this.matches.map(m => m instanceof StringMatch ? escRe(m.string) : escRe(m.from) + "-" + escRe(m.to)).join("")}]`
165 |     else
166 |       return this.matches.map(m => toSubRegexp(m, this)).join("|")
167 |   }
168 | 
169 |   toExpr(getName) {
170 |     if (this.simple) return super.toExpr()
171 |     return `[${OP_CHOICE}, ${this.matches.map(m => m.toExpr(getName)).join(", ")}]`
172 |   }
173 | 
174 |   forEach(f) { f(this); this.matches.forEach(m => m.forEach(f)) }
175 | 
176 |   static create(left, right) {
177 |     let matches = []
178 |     if (left instanceof ChoiceMatch) matches = matches.concat(left.matches)
179 |     else matches.push(left)
180 |     if (right instanceof ChoiceMatch) matches = matches.concat(right.matches)
181 |     else matches.push(right)
182 |     return new ChoiceMatch(matches)
183 |   }
184 | }
185 | exports.ChoiceMatch = ChoiceMatch
186 | 
187 | class RepeatMatch extends MatchExpr {
188 |   constructor(match, type) {
189 |     super()
190 |     this.match = match
191 |     this.type = type
192 |   }
193 | 
194 |   get simple() { return this.match.simple }
195 | 
196 |   eq(other) { return other instanceof RepeatMatch && this.match.eq(other.match) && this.type == other.type }
197 | 
198 |   get regexpPrec() { return 3 }
199 | 
200 |   toRegexp() {
201 |     return toSubRegexp(this.match, this) + this.type
202 |   }
203 | 
204 |   toExpr(getName) {
205 |     if (this.simple) return super.toExpr()
206 |     return `[${this.type == "*" ? OP_STAR : this.type == "+" ? OP_PLUS : OP_MAYBE}, ${this.match.toExpr(getName)}]`
207 |   }
208 | 
209 |   forEach(f) { f(this); this.match.forEach(f) }
210 | }
211 | exports.RepeatMatch = RepeatMatch
212 | 
213 | class LookaheadMatch extends MatchExpr {
214 |   constructor(start, expr, positive) {
215 |     super()
216 |     this.start = start
217 |     this.expr = expr
218 |     this.positive = positive
219 |   }
220 | 
221 |   get isNull() { return true }
222 | 
223 |   get simple() { return !!this.expr }
224 | 
225 |   eq(other) {
226 |     return other instanceof LookaheadMatch && other.start == this.start &&
227 |       (this.expr ? other.expr && this.expr.eq(other.expr) : !other.expr) &&
228 |       other.positive == this.positive
229 |   }
230 | 
231 |   toRegexp() {
232 |     if (this.expr)
233 |       return `(?${this.positive ? "=" : "!"}${this.expr.toRegexp()})`
234 |     else // Not actually a regexp, but used for graph output
235 |       return "LOOKAHEAD(" + this.start + ")"
236 |   }
237 | 
238 |   toExpr(getName) {
239 |     if (this.expr) return super.toExpr()
240 |     return `[${this.positive ? OP_LOOKAHEAD : OP_NEG_LOOKAHEAD}, ${getName(this.start.name)}]`
241 |   }
242 | 
243 |   forEach(f) { f(this); if (this.expr) this.expr.forEach(f) }
244 | }
245 | exports.LookaheadMatch = LookaheadMatch
246 | 
247 | class PredicateMatch extends MatchExpr {
248 |   constructor(name) {
249 |     super()
250 |     this.name = name
251 |   }
252 | 
253 |   get isNull() { return true }
254 | 
255 |   get simple() { return false }
256 | 
257 |   eq(other) { return other instanceof PredicateMatch && other.name == this.name }
258 | 
259 |   toRegexp() { return "PRED(" + this.name + ")" }
260 | 
261 |   toExpr() {
262 |     return `[${OP_PREDICATE}, ${JSON.stringify(this.name)}]`
263 |   }
264 | }
265 | exports.PredicateMatch = PredicateMatch
266 | 
267 | let eqArray = exports.eqArray = function(a, b) {
268 |   if (a.length != b.length) return false
269 |   for (let i = 0; i < a.length; i++) if (!a[i].eq(b[i])) return false
270 |   return true
271 | }
272 | 


--------------------------------------------------------------------------------
/src/mode.js:
--------------------------------------------------------------------------------
  1 | var verbose = 0
  2 | 
  3 | function Context(name, tokenType, depth, parent, line, pos) {
  4 |   this.name = name
  5 |   this.tokenType = tokenType
  6 |   this.depth = depth
  7 |   this.parent = parent
  8 |   this.startLine = line
  9 |   this.startPos = pos
 10 | }
 11 | 
 12 | var MAX_LOOKAHEAD_LINES = 3
 13 | 
 14 | function MatchContext() {
 15 |   this.stream = null
 16 |   this.line = this.startPos = 0
 17 |   this.string = this.startLine = ""
 18 |   this.copyInstance = null
 19 | }
 20 | 
 21 | MatchContext.prototype.start = function(stream) {
 22 |   this.stream = stream
 23 |   this.line = 0
 24 |   this.string = stream.string.slice(stream.start)
 25 |   this.startLine = stream.string
 26 |   this.startPos = stream.start
 27 |   return this
 28 | }
 29 | 
 30 | MatchContext.prototype.startLinebreak = function() {
 31 |   this.stream = null
 32 |   this.line = this.startPos = 0
 33 |   this.string = "\n"
 34 |   this.startLine = ""
 35 |   return this
 36 | }
 37 | 
 38 | MatchContext.prototype.copy = function() {
 39 |   var copy = this.copyInstance || (this.copyInstance = new MatchContext)
 40 |   copy.stream = this.stream
 41 |   copy.startPos = this.startPos
 42 |   copy.line = this.line
 43 |   copy.startLine = this.startLine
 44 |   copy.string = this.string
 45 |   return copy
 46 | }
 47 | 
 48 | MatchContext.prototype.updateStart = function() {
 49 |   this.startLine = !this.stream ? "" : this.line == 0 ? this.stream.string : this.stream.lookAhead(this.line)
 50 |   this.startPos = this.startLine.length - (this.string.length - 1)
 51 | }
 52 | 
 53 | MatchContext.prototype.ahead = function(n) {
 54 |   for (;;) {
 55 |     if (n <= this.string.length) return true
 56 |     if (this.string.charCodeAt(this.string.length - 1) !== 10) {
 57 |       this.string += "\n"
 58 |     } else if (this.line === MAX_LOOKAHEAD_LINES || !this.stream || !this.stream.lookAhead) {
 59 |       return false
 60 |     } else {
 61 |       var next = this.stream.lookAhead(this.line + 1)
 62 |       if (next == null) return false
 63 |       this.string += next + "\n"
 64 |       this.line++
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | var tokenValue = null
 70 | 
 71 | var stateClass = function(graph, options) {
 72 |   function StateClass(stack, context) {
 73 |     this.stack = stack
 74 |     this.context = context
 75 |   }
 76 | 
 77 |   StateClass.prototype.matchNext = function(mcx, pos, maxSkip, top) {
 78 |     var depth = this.stack.length - 1, node = this.stack[depth], edges = graph.nodes[node]
 79 | 
 80 |     for (var i = 0; i < edges.length; i++) {
 81 |       var op = edges[i], matched, to // See compileEdge in compile.js
 82 |       if (op === 0) { // Null match
 83 |         matched = pos
 84 |         to = edges[++i]
 85 |       } else if (op === 1 || op === 2) {   // 1, callTarget, returnTo
 86 |         var target = edges[++i]            // 2, callTarget, returnTo, context
 87 |         var returnTo = edges[++i]
 88 |         this.go(returnTo)
 89 |         var oldContext = this.context
 90 |         if (op === 2) {
 91 |           var cx = edges[++i]
 92 |           this.context = new Context(cx.name, cx.token, this.stack.length, this.context, mcx.startLine, mcx.startPos)
 93 |         }
 94 |         this.stack.push(target)
 95 |         var inner = this.matchNext(mcx, pos, 0, false)
 96 |         if (inner === pos) inner = this.matchNext(mcx, pos, i == edges.length - 1 ? maxSkip : 0, top)
 97 |         if (inner < 0) { // Reset state when the call fails
 98 |           this.stack.length = depth + 1
 99 |           this.stack[depth] = node
100 |           this.context = oldContext
101 |           continue
102 |         }
103 |         return inner
104 |       } else if (op === 3) { // 3, tokenType, matchExpr, nextNode
105 |         var token = edges[++i]
106 |         matched = this.matchExpr(edges[++i], mcx, pos)
107 |         to = edges[++i]
108 |         if (matched > pos) tokenValue = token
109 |       } else { // matchExpr, nextNode
110 |         matched = this.matchExpr(op, mcx, pos)
111 |         to = edges[++i]
112 |       }
113 | 
114 |       if (matched < 0) {
115 |         if (maxSkip > 0 && i == edges.length - 1) {
116 |           if (verbose > 0) console["log"]("Dead end at", mcx.string.slice(pos), node, this.stack.join())
117 |           maxSkip--
118 |           matched = pos
119 |         } else {
120 |           continue
121 |         }
122 |       }
123 |       this.go(to)
124 |       if (!top && to === -1 || this.stack.length === 0) return matched
125 | 
126 |       if (matched > pos) {
127 |         if (verbose > 1)
128 |           console["log"]("Token", JSON.stringify(mcx.string.slice(pos, matched)), "from", node, "to", to, "under", this.stack.join())
129 |         return matched
130 |       } else {
131 |         matched = this.matchNext(mcx, pos, i == edges.length - 1 ? maxSkip : 0, top)
132 |         if (matched >= 0) return matched
133 |         this.stack.length = depth + 1
134 |         this.stack[depth] = node
135 |       }
136 |     }
137 |     return -1
138 |   }
139 | 
140 |   StateClass.prototype.go = function(to) {
141 |     this.stack.pop()
142 |     while (this.context && this.context.depth > this.stack.length)
143 |       this.context = this.context.parent
144 |     if (to !== -1) this.stack.push(to)
145 |   }
146 | 
147 |   StateClass.prototype.runMaybe = function(mcx, pos, maxSkip) {
148 |     tokenValue = null
149 |     return this.matchNext(mcx, pos, maxSkip, true)
150 |   }
151 | 
152 |   StateClass.prototype.forward = function(mcx, pos) {
153 |     var progress = this.runMaybe(mcx, pos, 2)
154 |     if (progress < 0) {
155 |       if (verbose > 0) console["log"]("Lost it at", mcx.string.slice(pos), this.stack.join())
156 |       this.stack.push(graph.token)
157 |       progress = this.runMaybe(mcx, pos, 0)
158 |     }
159 |     return progress
160 |   }
161 | 
162 |   StateClass.prototype.lookahead = function(mcx, pos, start) {
163 |     var oldTokenValue = tokenValue
164 |     var state = new this.constructor([start], null)
165 |     mcx = mcx.copy()
166 |     for (;;) {
167 |       mcx.updateStart()
168 |       // FIXME implement custom scanning algorithm. This one breaks when a sub-match fails
169 |       var newPos = state.runMaybe(mcx, pos, 0)
170 |       if (newPos < 0) { tokenValue = oldTokenValue; return false }
171 |       if (state.stack.length === 0) { tokenValue = oldTokenValue; return true }
172 |       pos = newPos
173 |     }
174 |   }
175 | 
176 |   StateClass.prototype.matchExpr = function(expr, mcx, pos) {
177 |     if (typeof expr === "string") {
178 |       var end = pos + expr.length
179 |       return mcx.ahead(end) && mcx.string.slice(pos, end) === expr ? end : -1
180 |     }
181 |     if (expr.exec) {
182 |       var m = mcx.ahead(pos + 1) && expr.exec(pos > 0 ? mcx.string.slice(pos) : mcx.string)
183 |       if (!m) return -1
184 |       return pos + m[0].length
185 |     }
186 | 
187 |     var op = expr[0]
188 |     if (op === 0) { // OP_SEQ, ...rest
189 |       for (var i = 1; i < expr.length; i++) {
190 |         pos = this.matchExpr(expr[i], mcx, pos)
191 |         if (pos < 0) return -1
192 |       }
193 |       return pos
194 |     } else if (op === 1) { // OP_CHOICE, ...rest
195 |       for (var i = 1, e = expr.length - 1;; i++) {
196 |         var cur = this.matchExpr(expr[i], mcx, pos)
197 |         if (i === e || cur > -1) return cur
198 |       }
199 |       return -1
200 |     } else if (op === 2 || op === 3) { // OP_STAR/OP_PLUS, expr
201 |       if (op === 3 && (pos = this.matchExpr(expr[1], mcx, pos)) < 0) return -1
202 |       for (;;) {
203 |         var inner = this.matchExpr(expr[1], mcx, pos)
204 |         if (inner == -1) return pos
205 |         pos = inner
206 |       }
207 |     } else if (op === 4) { // OP_MAYBE, expr
208 |       return Math.max(this.matchExpr(expr[1], mcx, pos), pos)
209 |     } else if (op === 5) { // OP_LOOKAHEAD, expr
210 |       return this.lookahead(mcx, pos, expr[1]) ? pos : -1
211 |     } else if (op === 6) { // OP_NEG_LOOKAHEAD, expr
212 |       return this.lookahead(mcx, pos, expr[1]) ? -1 : pos
213 |     } else if (op === 7) { // OP_PREDICATE, name
214 |       var lineStart = pos ? mcx.string.lastIndexOf("\n", pos - 1) : -1, line, linePos
215 |       if (mcx.stream && lineStart < 0) {
216 |         line = mcx.stream.string
217 |         linePos = pos + mcx.stream.start
218 |       } else {
219 |         var lineEnd = mcx.string.indexOf("\n", pos)
220 |         line = mcx.string.slice(lineStart + 1, lineEnd < 0 ? mcx.string.length : lineEnd)
221 |         linePos = pos - (lineStart + 1)
222 |       }
223 |       return options.predicates[expr[1]](line, linePos, this.context, mcx.stream ? nextLines(mcx.stream) : noNextLines) ? pos : -1
224 |     } else {
225 |       throw new Error("Unknown match type " + expr)
226 |     }
227 |   }
228 | 
229 |   function noNextLines() { return null }
230 | 
231 |   function nextLines(stream) { return function(n) { return stream.lookAhead(n) } }
232 | 
233 |   StateClass.prototype.contextAt = function(line, linePos) {
234 |     var copy = this.copy(), mcx = new MatchContext, pos = 0, lastCx = this.context
235 |     mcx.string = line + "\n"
236 |     mcx.startLine = line
237 |     for (;;) {
238 |       var matched = copy.runMaybe(mcx, pos, 0)
239 |       if (matched == -1) return copy.context
240 |       if (matched > linePos) {
241 |         var context = copy.context
242 |         if (pos == linePos) {
243 |           trim: while (context) {
244 |             for (var prev = lastCx; prev; prev = prev.parent) if (prev === context) break trim
245 |             context = context.parent
246 |           }
247 |         }
248 |         return context
249 |       }
250 |       pos = matched
251 |       lastCx = copy.context
252 |     }
253 |   }
254 | 
255 |   StateClass.prototype.copy = function() {
256 |     return new this.constructor(this.stack.slice(), this.context)
257 |   }
258 | 
259 |   StateClass.start = function() {
260 |     return new this([graph.start], null)
261 |   }
262 | 
263 |   return StateClass
264 | }
265 | 
266 | // declare global: CodeMirror
267 | function GrammarMode(graph, options) {
268 |   this.State = stateClass(graph, options || {})
269 |   this.mcx = new MatchContext
270 | }
271 | CodeMirror.GrammarMode = GrammarMode
272 | 
273 | GrammarMode.prototype.startState = function() { return this.State.start() }
274 | 
275 | GrammarMode.prototype.copyState = function(state) { return state.copy() }
276 | 
277 | GrammarMode.prototype.token = function(stream, state) {
278 |   stream.pos += state.forward(this.mcx.start(stream), 0)
279 |   var tokenType = tokenValue
280 |   for (var cx = state.context; cx; cx = cx.parent)
281 |     if (cx.tokenType) tokenType = cx.tokenType + (tokenType ? " " + tokenType : "")
282 |   if (stream.eol())
283 |     state.forward(this.mcx, stream.pos - stream.start)
284 |   return tokenType
285 | }
286 | 
287 | GrammarMode.prototype.blankLine = function(state) {
288 |   state.forward(this.mcx.startLinebreak(), 0)
289 | }
290 | 


--------------------------------------------------------------------------------
/src/parse.js:
--------------------------------------------------------------------------------
  1 | module.exports = function(file, fileName) {
  2 |   return parseGrammar(new Input(file, fileName), 0)
  3 | }
  4 | 
  5 | class Node {
  6 |   constructor(type, start, props, end) {
  7 |     this.type = type
  8 |     this.start = start
  9 |     this.end = end
 10 |     if (props) for (let prop in props) this[prop] = props[prop]
 11 |   }
 12 | }
 13 | 
 14 | const wordChar = /[\w_$]/
 15 | 
 16 | class Input {
 17 |   constructor(string, fileName) {
 18 |     this.string = string
 19 |     this.fileName = fileName
 20 |     this.type = "sof"
 21 |     this.value = null
 22 |     this.start = this.end = this.lastEnd = 0
 23 |     this.next()
 24 |   }
 25 | 
 26 |   lineInfo(pos) {
 27 |     for (let line = 1, cur = 0;;) {
 28 |       let next = this.string.indexOf("\n", cur)
 29 |       if (next > -1 && next < pos) {
 30 |         ++line
 31 |         cur = next + 1
 32 |       } else {
 33 |         return {line, ch: pos - cur, fileName: this.fileName}
 34 |       }
 35 |     }
 36 |   }
 37 | 
 38 |   raise(msg, pos) {
 39 |     let info = this.lineInfo(pos)
 40 |     throw new SyntaxError(`${msg} (${info.fileName ? info.fileName + " " : ""}${info.line}:${info.ch})`)
 41 |   }
 42 | 
 43 |   match(pos, re) {
 44 |     let match = re.exec(this.string.slice(pos))
 45 |     return match ? pos + match[0].length : -1
 46 |   }
 47 | 
 48 |   next() {
 49 |     this.lastEnd = this.end
 50 |     let start = this.match(this.end, /^(\s|\/\/.*|\/\*[^]*?\*\/)*/)
 51 |     if (start == this.string.length) return this.set("eof", null, start, start)
 52 | 
 53 |     let next = this.string[start]
 54 |     if (next == '"') {
 55 |       let end = this.match(start + 1, /^(\\.|[^"])*"/)
 56 |       if (end == -1) this.raise("Unterminated string literal", start)
 57 |       return this.set("string", JSON.parse(this.string.slice(start, end)), start, end)
 58 |     } else if (/[()|&~!\-+*?{}\.,=]/.test(next)) {
 59 |       return this.set(next, null, start, start + 1)
 60 |     } else if (wordChar.test(next)) {
 61 |       let end = start + 1
 62 |       while (end < this.string.length && wordChar.test(this.string[end])) end++
 63 |       return this.set("id", this.string.slice(start, end), start, end)
 64 |     } else {
 65 |       this.raise("Unexpected character " + JSON.stringify(next), start)
 66 |     }
 67 |   }
 68 | 
 69 |   set(type, value, start, end) {
 70 |     this.type = type
 71 |     this.value = value
 72 |     this.start = start
 73 |     this.end = end
 74 |   }
 75 | 
 76 |   startNode(type, props) {
 77 |     return new Node(type, this.start, props)
 78 |   }
 79 | 
 80 |   finishNode(node, type) {
 81 |     if (type != null) node.type = type
 82 |     node.end = this.lastEnd
 83 |     return node
 84 |   }
 85 | 
 86 |   eat(type, value) {
 87 |     if (this.type == type && (value == null || this.value === value)) {
 88 |       this.next()
 89 |       return true
 90 |     } else {
 91 |       return false
 92 |     }
 93 |   }
 94 | 
 95 |   unexpected() {
 96 |     this.raise(`Unexpected token '${this.string.slice(this.start, this.end)}'`, this.start)
 97 |   }
 98 | }
 99 | 
100 | function parseGrammar(input) {
101 |   let node = input.startNode("GrammarDeclaration", {
102 |     rules: Object.create(null),
103 |     extends: null,
104 |     included: []
105 |   })
106 | 
107 |   for (;;) {
108 |     let start = input.start
109 |     if (input.eat("id", "extends")) {
110 |       if (node.extends) input.raise("Can't extend multiple grammars", start)
111 |       if (input.type != "string") input.unexpected()
112 |       node.extends = input.value
113 |       input.next()
114 |     } else if (input.eat("id", "include")) {
115 |       let inclNode = new Node("IncludeDeclaration", start)
116 |       if (input.type != "string") input.unexpected()
117 |       inclNode.value = input.value
118 |       input.next()
119 |       if (!input.eat("id", "as")) input.unexpected()
120 |       inclNode.id = parseIdent(input)
121 |       node.included.push(input.finishNode(inclNode))
122 |     } else {
123 |       break
124 |     }
125 |   }
126 | 
127 |   while (input.type != "eof") {
128 |     if (input.eat("id", "skip")) {
129 |       let skipExpr = parseExprChoice(input)
130 |       if (!input.eat("{")) input.unexpected()
131 |       while (!input.eat("}"))
132 |         parseRule(input, node.rules, false, skipExpr)
133 |     } else if (input.eat("id", "tokens")) {
134 |       if (!input.eat("{")) input.unexpected()
135 |       while (!input.eat("}"))
136 |         parseRule(input, node.rules, true, null)
137 |     } else {
138 |       parseRule(input, node.rules, false, null)
139 |     }
140 |   }
141 |   return input.finishNode(node)
142 | }
143 | 
144 | function parseRule(input, rules, isToken, skip) {
145 |   let node = input.startNode("RuleDeclaration", {
146 |     isToken,
147 |     // FIXME Storing the same sub-ast in multiple nodes is a rather
148 |     // weird way to build an AST
149 |     skip,
150 |     context: input.eat("id", "context"),
151 |     start: input.eat("id", "start"),
152 |     id: parseIdent(input),
153 |     tokenType: null,
154 |     params: []
155 |   })
156 |   if (node.id.name in rules)
157 |     input.raise(`Duplicate rule declaration '${node.id.name}'`, node.id.start)
158 |   rules[node.id.name] = node
159 | 
160 |   if (input.eat("(")) while (!input.eat(")")) {
161 |     if (node.params.length && !input.eat(",")) input.unexpected()
162 |     node.params.push(parseIdent(input))
163 |   }
164 |   if (isToken && node.params.length > 0)
165 |     input.raise("Token rules must not take parameters", node.params[0].start)
166 |   if (input.eat("=")) {
167 |     if (input.type != "string") input.unexpected()
168 |     node.tokenType = input.value
169 |     input.next()
170 |     node.context = true
171 |   }
172 |   if (!input.eat("{")) input.unexpected()
173 |   node.expr = parseExprChoice(input)
174 |   if (!input.eat("}")) input.unexpected()
175 |   return input.finishNode(node)
176 | }
177 | 
178 | function parseExprInner(input) {
179 |   if (input.eat("(")) {
180 |     let expr = parseExprChoice(input)
181 |     if (!input.eat(")")) input.unexpected()
182 |     return expr
183 |   }
184 | 
185 |   let node = input.startNode()
186 |   if (input.type == "string") {
187 |     let value = input.value
188 |     input.next()
189 |     if (value.length == 1 && input.eat("-")) {
190 |       if (input.type != "string" || input.value.length != 1) input.unexpected()
191 |       node.from = value
192 |       node.to = input.value
193 |       input.next()
194 |       return input.finishNode(node, "CharacterRange")
195 |     } else {
196 |       if (value.length == 0) input.raise("Empty strings are not valid in grammars", node.start)
197 |       node.value = value
198 |       return input.finishNode(node, "StringMatch")
199 |     }
200 |   } else if (input.eat("id", "super")) {
201 |     return input.finishNode(node, "SuperMatch")
202 |   } else if (input.eat("&")) {
203 |     node.id = parseIdent(input)
204 |     return input.finishNode(node, "PredicateMatch")
205 |   } else if (input.eat("id", "_")) {
206 |     return input.finishNode(node, "AnyMatch")
207 |   } else if (input.eat(".")) {
208 |     return input.finishNode(node, "DotMatch")
209 |   } else {
210 |     node.id = parseDottedIdent(input)
211 |     node.arguments = []
212 |     if (input.start == node.id.end && input.eat("(")) while (!input.eat(")")) {
213 |       if (node.arguments.length && !input.eat(",")) input.unexpected()
214 |       node.arguments.push(parseExprChoice(input))
215 |     }
216 |     return input.finishNode(node, "RuleIdentifier")
217 |   }
218 | }
219 | 
220 | function parseExprSuffix(input) {
221 |   let start = input.start
222 |   let expr = parseExprInner(input)
223 |   if (input.type == "*" || input.type == "?" || input.type == "+") {
224 |     let node = new Node("RepeatedMatch", start, {
225 |       expr,
226 |       kind: input.type
227 |     }, input.end)
228 |     input.next()
229 |     return node
230 |   }
231 |   return expr
232 | }
233 | 
234 | function parseExprLookahead(input) {
235 |   if (input.type == "!" || input.type == "~") {
236 |     let node = input.startNode("LookaheadMatch", {kind: input.type})
237 |     input.next()
238 |     node.expr = parseExprSuffix(input)
239 |     return input.finishNode(node)
240 |   } else {
241 |     return parseExprSuffix(input)
242 |   }
243 | }
244 | 
245 | function endOfSequence(input) {
246 |   return input.type == "}" || input.type == ")" || input.type == "|" || input.type == "{" || input.type == ","
247 | }
248 | 
249 | function parseExprSequence(input) {
250 |   let start = input.start, first = parseExprLookahead(input)
251 |   if (endOfSequence(input)) return first
252 |   let node = new Node("SequenceMatch", start, {exprs: [first]})
253 |   do { node.exprs.push(parseExprLookahead(input)) }
254 |   while (!endOfSequence(input))
255 |   return input.finishNode(node)
256 | }
257 | 
258 | function parseExprChoice(input) {
259 |   let start = input.start, left = parseExprSequence(input)
260 |   if (!input.eat("|")) return left
261 |   let node = new Node("ChoiceMatch", start, {exprs: [left]})
262 |   do { node.exprs.push(parseExprSequence(input)) }
263 |   while (input.eat("|"))
264 |   return input.finishNode(node)
265 | }
266 | 
267 | function parseIdent(input) {
268 |   if (input.type != "id") input.unexpected()
269 |   let node = input.startNode("Identifier", {name: input.value})
270 |   input.next()
271 |   return input.finishNode(node)
272 | }
273 | 
274 | function parseDottedIdent(input) {
275 |   if (input.type != "id") input.unexpected()
276 |   let node = input.startNode("Identifier", {name: input.value})
277 |   input.next()
278 |   while (input.start == input.lastEnd && input.eat(".")) {
279 |     if (input.type != "id") input.unexpected()
280 |     node.name += "." + input.value
281 |     input.next()
282 |   }
283 |   return input.finishNode(node)
284 | }
285 | 


--------------------------------------------------------------------------------