├── .gitignore ├── LICENSE ├── README.md ├── package.json └── src ├── ast.js ├── compile.js ├── grammar-mode.js ├── graph.js ├── matchexpr.js ├── mode.js └── parse.js /.gitignore: -------------------------------------------------------------------------------- 1 | .tern-port 2 | /node_modules 3 | /dist 4 | /src/scratch -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017 by Marijn Haverbeke and others 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CodeMirror grammar mode 2 | 3 | This is an experimental tool for building CodeMirror modes from 4 | grammar descriptions. 5 | 6 | You write a grammar like this: 7 | 8 | skip (" " | "\t" | "\n")* { 9 | Expr { (num | var | ParenExpr) (op Expr)? } 10 | context ParenExpr { "(" Expr ")" } 11 | } 12 | tokens { 13 | num="number" { digit+ } 14 | var="variable" { letter (letter | digit)* } 15 | op { "+" | "-" | "/" | "*" } 16 | } 17 | digit { "0"-"9" } 18 | letter { "a"-"z" | "A"-"Z" } 19 | 20 | And then run `grammar-mode` on it to convert it into a JavaScript 21 | file. This file will export a set of bindings that can be given to the 22 | accompanying interpreter (in `src/mode.js`) to create a CodeMirror 23 | mode. 24 | 25 | ## Grammar syntax 26 | 27 | A grammar is a set of rules. Rules may appear on the top level or 28 | within `tokens` or `skip` blocks. The rules within `tokens` are 29 | considered the base token types of the language, and will be fallen 30 | back on when nothing else matches. A `skip` block is used to 31 | automatically insert whitespace-like productions between the elements 32 | of the rules inside of it. 33 | 34 | Each rule has a name, optionally followed by the keyword `context` to 35 | mark it as a rule for which a context has to be pushed onto the 36 | context stack. Contexts can be used by external code to do things like 37 | computing indentation based on what rules are currently active. 38 | 39 | After the rule name, you can add an equals sign and a quoted string to 40 | set a token type for the rule (for example `num="number"` in the 41 | example). That token type will be used to highlight the text that 42 | matches the rule. 43 | 44 | Each rule contains a match expression, which is built up like this: 45 | 46 | - A `"literal string"` (using JSON string syntax) matches that exact 47 | text. 48 | 49 | - An underscore matches any character, and a period matches any 50 | character except newlines. 51 | 52 | - A character range is written as two single-character strings 53 | with a dash in between. 54 | 55 | - An unquoted word is a reference to another rule. 56 | 57 | - Multiple expressions separated by whitespace indicate that these 58 | things must match in sequence. 59 | 60 | - Parentheses can be used around expressions to group them. 61 | 62 | - Multiple expressions separated by pipe characters indicate a choice 63 | between those expressions. The first choice that matches is taken. 64 | 65 | - A `+`, `*`, or `?` after an expression allows that expression to 66 | occur one or more (`+`), zero or more (`*`), or zero or one (`?`) 67 | times. This is done greedily — as many repetitions as possible are 68 | matched. 69 | 70 | - A `~` or `!` character followed by an expression denotes a 71 | lookahead — positive lookahead for `~` and negative for `!`. 72 | 73 | - An `&` followed by a name is a call to a predicate. This is an 74 | external function that will be called to determine whether a given 75 | position matches. 76 | 77 | ## Single-edge lookahead 78 | 79 | A grammar is compiled to a set of state machines, whose edges are 80 | regular expressions, possibly extended with predicate calls and 81 | lookaheads, or calls to rules. When parsing, the interpreter will take 82 | the first edge that matches and consumes input, without looking ahead 83 | further. 84 | 85 | The catch is that you have somehow write your grammar so that the 86 | right choice is made at every point. If something is ambiguous, the 87 | parser will just always take the first path. So, depending on your 88 | grammar, you might have to insert lookaheads to disambiguate things. 89 | For example, to distinguish between a variable and a label in a C-like 90 | language, you'd need rules something like this: 91 | 92 | Statement { 93 | label ":" | 94 | variable | 95 | otherThing 96 | } 97 | 98 | label="meta" { letter+ ~(spaceChar* ":") } 99 | variable="variable" { letter+ } 100 | 101 | ## Command-line parameters 102 | 103 | The `grammar-mode` command expects a file as argument, or will read 104 | from standard input when not given one. Other, optional, arguments 105 | include: 106 | 107 | * `--output file` specifies a file to write the output to (defaults 108 | to standard output). 109 | 110 | * `--es-module` tells the tool to output an ES6 module (default is a 111 | CommonJS module). 112 | 113 | * `--graph` will cause it to output a graph in .dot format instead of 114 | a JavaScript module. Can be useful for debugging. 115 | 116 | * `--names` will cause the JavaScript output to be more verbose but 117 | easier to read, using string names rather than numbers for the 118 | nodes. 119 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "codemirror-grammar-mode", 3 | "version": "0.1.10", 4 | "description": "Experimental approach to writing CodeMirror modes", 5 | "bin": { 6 | "grammar-mode": "./src/grammar-mode.js" 7 | }, 8 | "main": "src/mode.js", 9 | "scripts": { 10 | "test": "mocha test/test-*.js" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/codemirror/grammar-mode/" 15 | }, 16 | "keywords": [ 17 | "syntax", 18 | "highlighting", 19 | "editor", 20 | "codemirror", 21 | "mode", 22 | "grammar", 23 | "parser" 24 | ], 25 | "author": "Marijn Haverbeke ", 26 | "license": "MIT", 27 | "devDependencies": { 28 | "codemirror": "^5.25.2" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/ast.js: -------------------------------------------------------------------------------- 1 | function build(type, from, props) { 2 | props.type = type 3 | props.start = from.start 4 | props.end = from.end 5 | return props 6 | } 7 | 8 | function noSkipAfter(node) { 9 | let t = node.type 10 | return t == "LookaheadMatch" || t == "PredicateMatch" || t == "Label" || 11 | t == "RepeatedMatch" && node.kind != "?" 12 | } 13 | 14 | // Replaces super matches, inserts skip matches in the appropriate 15 | // places, splits string matches with newlines, and collapses nested 16 | // sequence/choice expressions, so that further passes don't have to 17 | // worry about those. 18 | let normalizeExpr = exports.normalizeExpr = function(expr, ruleName, superGrammar, skip, prefix) { 19 | if (expr.type == "StringMatch" && expr.value.length > 1 && expr.value.indexOf("\n") > -1) { 20 | let exprs = [] 21 | expr.value.split(/\n/).forEach((part, i) => { 22 | if (i) exprs.push(build("StringMatch", expr, {value: "\n"})) 23 | if (part.length) exprs.push(build("StringMatch", expr, {value: part})) 24 | }) 25 | return build("SequenceMatch", expr, {exprs}) 26 | } else if (expr.type == "RuleIdentifier") { 27 | for (let i = 0; i < expr.arguments.length; i++) 28 | expr.arguments[i] = normalizeExpr(expr.arguments[i], ruleName, superGrammar, skip, prefix) 29 | if (prefix) expr.id.name = prefix + expr.id.name 30 | } else if (expr.type == "RepeatedMatch") { 31 | let inner = normalizeExpr(expr.expr, ruleName, superGrammar, skip, prefix) 32 | if (skip && expr.kind != "?") inner = build("SequenceMatch", inner, {exprs: [inner, skip]}) 33 | expr.expr = inner 34 | } else if (expr.type == "LookaheadMatch") { 35 | expr.expr = normalizeExpr(expr.expr, ruleName, null, skip, prefix) 36 | } else if (expr.type == "SequenceMatch") { 37 | let exprs = [] 38 | for (let i = 0; i < expr.exprs.length; i++) { 39 | let next = normalizeExpr(expr.exprs[i], ruleName, superGrammar, skip, prefix) 40 | if (next.type == "SequenceMatch") exprs = exprs.concat(next.exprs) 41 | else exprs.push(next) 42 | if (skip && i < expr.exprs.length - 1 && !noSkipAfter(next)) 43 | exprs.push(skip) 44 | } 45 | expr.exprs = exprs 46 | } else if (expr.type == "ChoiceMatch") { 47 | let exprs = [] 48 | for (let i = 0; i < expr.exprs.length; i++) { 49 | let next = normalizeExpr(expr.exprs[i], ruleName, superGrammar, skip, prefix) 50 | if (next.type == "ChoiceMatch") exprs = exprs.concat(next.exprs) 51 | else exprs.push(next) 52 | } 53 | expr.exprs = exprs 54 | } else if (expr.type == "SuperMatch") { 55 | for (let grammar = superGrammar; grammar; grammar = grammar.super) { 56 | let rule = grammar.rules[ruleName] 57 | if (rule) return normalizeExpr(rule.expr, ruleName, grammar.super, skip, prefix) 58 | } 59 | throw new SyntaxError(`No super rule found for '${ruleName}'`) 60 | } 61 | return expr 62 | } 63 | 64 | let eqExpr = exports.eqExpr = function(a, b) { 65 | if (a.type != b.type) return false 66 | if (a.type == "StringMatch") return a.value == b.value 67 | if (a.type == "CharacterRange") return a.from == b.from && a.to == b.to 68 | if (a.type == "AnyMatch" || a.type == "DotMatch") return true 69 | if (a.type == "RuleIdentifier") return a.id.name == b.id.name && eqExprs(a.arguments, b.arguments) 70 | if (a.type == "RepeatedMatch" || a.type == "LookaheadMatch") return a.kind == b.kind && eqExpr(a.expr, b.expr) 71 | if (a.type == "SequenceMatch" || a.type == "ChoiceMatch") return eqExprs(a.exprs, b.exprs) 72 | if (a.type == "PredicateMatch") return a.id.name == b.id.name 73 | throw new Error("Missed case in eqExpr: " + a.type) 74 | } 75 | 76 | let eqExprs = exports.eqExprs = function(a, b) { 77 | if (a.length != b.length) return false 78 | for (let i = 0; i < a.length; i++) if (!eqExpr(a[i], b[i])) return false 79 | return true 80 | } 81 | 82 | function instantiateArray(params, args, exprs) { 83 | let updated = null 84 | for (let i = 0; i < exprs.length; i++) { 85 | let cur = exprs[i], inst = instantiateArgs(params, args, cur) 86 | if (cur != inst && !updated) updated = exprs.slice(0, i) 87 | if (updated) updated.push(inst) 88 | } 89 | return updated || exprs 90 | } 91 | 92 | let instantiateArgs = exports.instantiateArgs = function(params, args, expr) { 93 | if (expr.type == "RuleIdentifier") { 94 | let pos = params.indexOf(expr.id.name) 95 | if (pos > -1) { 96 | if (expr.arguments.length) throw new Error("Arguments to params not supported yet") 97 | return args[pos] 98 | } 99 | let newArgs = instantiateArray(params, args, expr.arguments) 100 | return newArgs == expr.arguments ? expr : build(expr.type, expr, {id: expr.id, arguments: newArgs}) 101 | } else if (expr.type == "RepeatedMatch" || expr.type == "LookaheadMatch") { 102 | let inst = instantiateArgs(params, args, expr.expr) 103 | return inst != expr.expr ? build(expr.type, expr, {expr: inst, kind: expr.kind}) : expr 104 | } else if (expr.type == "SequenceMatch" || expr.type == "ChoiceMatch") { 105 | let updated = instantiateArray(params, args, expr.exprs) 106 | return updated != expr.exprs ? build(expr.type, expr, {exprs: updated}) : expr 107 | } else { 108 | return expr 109 | } 110 | } 111 | 112 | function forEachExpr(expr, f) { 113 | if (f(expr) === false) return 114 | if (expr.type == "RepeatedMatch" || expr.type == "LookaheadMatch") 115 | forEachExpr(expr.expr, f) 116 | else if (expr.type == "SequenceMatch" || expr.type == "ChoiceMatch") 117 | for (let i = 0; i < expr.exprs.length; i++) forEachExpr(expr.exprs[i], f) 118 | else if (expr.type == "RuleIdentifier") 119 | for (let i = 0; i < expr.arguments.length; i++) forEachExpr(expr.arguments[i], f) 120 | } 121 | exports.forEachExpr = forEachExpr 122 | -------------------------------------------------------------------------------- /src/compile.js: -------------------------------------------------------------------------------- 1 | const {Call, Token} = require("./graph") 2 | 3 | function buildEdgeInfo(graphs, getName, options) { 4 | let edgeList = [], matchN = 0 5 | 6 | for (let name in graphs) { 7 | let graph = graphs[name] 8 | for (let node = 0; node < graph.nodes.length; node++) { 9 | let edges = graph.nodes[node], nodeName = getName(name, node) 10 | for (let i = 0; i < edges.length; i++) { 11 | let {match, effect, to} = edges[i], matchStr = match.toExpr(getName) 12 | let useMatch = -1 13 | if (matchStr.length > 8 && !options.names) for (let j = 0; j < edgeList.length; j++) { 14 | let other = edgeList[j] 15 | if (other.match == matchStr) { 16 | useMatch = other.useMatch == -1 ? other.useMatch = matchN++ : other.useMatch 17 | break 18 | } 19 | } 20 | edgeList.push({ 21 | from: nodeName, 22 | to, 23 | match: useMatch == -1 ? matchStr : null, 24 | useMatch, 25 | effect, 26 | graph: name 27 | }) 28 | } 29 | } 30 | } 31 | return edgeList 32 | } 33 | 34 | // An edge can be one of the following: 35 | // 0, nextNode null edge 36 | // 1, callTarget, returnTo regular call 37 | // 2, callTarget, returnTo, context context call 38 | // 3, tokenType, matchExpr, nextNode token edge 39 | // matchExpr, nextNode regular match edge 40 | function compileEdge(edgeInfo, getName) { 41 | let to = edgeInfo.to == null ? -1 : getName(edgeInfo.graph, edgeInfo.to) 42 | if (edgeInfo.effect instanceof Call) { 43 | let {target, context} = edgeInfo.effect 44 | if (!context) return `1, ${getName(target.name)}, ${to}` 45 | return `2, ${getName(target.name)}, ${to}, ${JSON.stringify(context)}` 46 | } 47 | let match = edgeInfo.useMatch != -1 ? `e[${edgeInfo.useMatch}]` : edgeInfo.match 48 | if (edgeInfo.effect instanceof Token) 49 | return `3, ${JSON.stringify(edgeInfo.effect.type)}, ${match}, ${to}` 50 | if (match == "null") 51 | return `0, ${to}` 52 | return `${match}, ${to}` 53 | } 54 | 55 | function buildNamer(graphs, options) { 56 | if (options.names) { 57 | return (graphName, node) => JSON.stringify(graphName + (node ? "$" + node : "")) 58 | } else { 59 | let offsets = {}, offset = 0 60 | for (let name in graphs) { 61 | offsets[name] = offset 62 | offset += graphs[name].nodes.length 63 | } 64 | return (graphName, node) => offsets[graphName] + (node || 0) 65 | } 66 | } 67 | 68 | module.exports = function(graphs, options = {}) { 69 | let getName = buildNamer(graphs, options) 70 | let edgeInfo = buildEdgeInfo(graphs, getName, options) 71 | 72 | let exprVector = [] 73 | for (let i = 0; i < edgeInfo.length; i++) { 74 | let info = edgeInfo[i] 75 | if (info.useMatch > -1 && info.match) exprVector[info.useMatch] = info.match 76 | } 77 | 78 | let code = "", exp = options.esModule ? "export var " : "exports." 79 | if (exprVector.length) code += `var e = [${exprVector.join(", ")}]\n` 80 | let edges = [], nodes = [] 81 | for (let curNode = edgeInfo[0].from, i = 0;; i++) { 82 | let info = edgeInfo[i] 83 | if (!info || info.from != curNode) { 84 | if (options.names) nodes.push(`${curNode}: [\n ${edges.join(",\n ")}\n ]`) 85 | else nodes.push(`[${edges.join(",\n ")}]`) 86 | if (!info) break 87 | curNode = info.from 88 | edges.length = 0 89 | } 90 | edges.push(compileEdge(info, getName)) 91 | } 92 | code += `${exp}nodes = ${options.names ? "{" : "["}\n ${nodes.join(",\n ")}\n${options.names ? "}" : "]"}\n` 93 | code += `${exp}start = ${getName("_start")}\n` 94 | if (options.token !== false) 95 | code += `${exp}token = ${getName("_token")}\n` 96 | 97 | return code 98 | } 99 | -------------------------------------------------------------------------------- /src/grammar-mode.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const parse = require("./parse") 4 | const compile = require("./compile") 5 | const {buildGraph} = require("./graph") 6 | const path = require("path"), fs = require("fs") 7 | 8 | let input = null, outputGraph = false, names = false, token = true, esModule = false, output = null 9 | 10 | // declare global: process 11 | for (let i = 2; i < process.argv.length; i++) { 12 | let arg = process.argv[i] 13 | if (arg == "--graph") outputGraph = true 14 | else if (arg == "--no-token") token = false 15 | else if (arg == "--es-module") esModule = true 16 | else if (arg == "--names") names = true 17 | else if (arg == "--output") output = process.argv[++i] 18 | else if (arg == "--help") usage(0) 19 | else if (input || arg[0] == "-") usage(1) 20 | else input = arg 21 | } 22 | 23 | function usage(code) { 24 | ;(code ? process.stderr : process.stdout).write( 25 | "grammar-mode [file] [--output file] [--es-module] [--no-token] [--graph] [--names]\n" 26 | ) 27 | process.exit(code) 28 | } 29 | 30 | if (input) { 31 | out(run(parseWithSuper(path.dirname(input), fs.readFileSync(input, "utf8"), input))) 32 | } else { 33 | let buffer = "" 34 | process.stdin.resume() 35 | process.stdin.on("data", chunk => buffer += chunk.toString("utf8")) 36 | process.stdin.on("end", () => out(run(parseWithSuper(process.cwd(), buffer, null)))) 37 | } 38 | 39 | function parseWithSuper(base, input, fileName) { 40 | let ast = parse(input, fileName) 41 | if (ast.extends) { 42 | let file = path.resolve(base, ast.extends) 43 | ast.super = parseWithSuper(path.dirname(file), fs.readFileSync(file, "utf8"), file) 44 | } 45 | for (let i = 0; i < ast.included.length; i++) { 46 | let file = path.resolve(base, ast.included[i].value) 47 | ast.included[i].ast = parseWithSuper(path.dirname(file), fs.readFileSync(file, "utf8"), file) 48 | } 49 | return ast 50 | } 51 | 52 | function run(ast) { 53 | let options = {token, esModule, names} 54 | let graphs = buildGraph(ast, options) 55 | if (outputGraph) 56 | return `digraph{\n${Object.keys(graphs).map(k => graphs[k].toString()).join("")}}\n` 57 | else 58 | return compile(graphs, options) 59 | } 60 | 61 | function out(string) { 62 | if (output) fs.writeFileSync(output, string, "utf8") 63 | else process.stdout.write(string, "utf8") 64 | } 65 | -------------------------------------------------------------------------------- /src/graph.js: -------------------------------------------------------------------------------- 1 | const {nullMatch, anyMatch, dotMatch, StringMatch, RangeMatch, SeqMatch, 2 | ChoiceMatch, RepeatMatch, LookaheadMatch, PredicateMatch} = require("./matchexpr") 3 | const {normalizeExpr, eqExprs, instantiateArgs, forEachExpr} = require("./ast") 4 | 5 | exports.buildGraph = function(grammar, options) { 6 | let {rules, start, tokens} = gatherRules(grammar) 7 | countReferences(rules, start, tokens) 8 | let cx = new Context(rules, Object.create(null)) 9 | let startGraph = cx.registerGraph("_start", new SubGraph), after = startGraph.node() 10 | startGraph.copy(0, after, cx.evalCall(start, [])) 11 | startGraph.edge(after, 0, anyMatch) 12 | let startGraphs = ["_start"] 13 | 14 | if (options.token !== false) { 15 | let tokenGraph = cx.registerGraph("_token", new SubGraph) 16 | for (let i = 0; i < tokens.length; i++) 17 | tokenGraph.copy(0, null, cx.evalCall(tokens[i], [])) 18 | tokenGraph.edge(0, null, anyMatch) 19 | startGraphs.push("_token") 20 | } 21 | 22 | return gcGraphs(cx.graphs, startGraphs) 23 | } 24 | 25 | class Call { 26 | constructor(target, context) { this.target = target; this.context = context } 27 | toString() { return `CALL(${this.target.name})` } 28 | } 29 | exports.Call = Call 30 | class Token { 31 | constructor(type) { this.type = type } 32 | toString() { return `TOKEN(${this.type})` } 33 | } 34 | exports.Token = Token 35 | 36 | class Edge { 37 | constructor(to, match, effect) { 38 | this.to = to 39 | this.match = match 40 | this.effect = effect 41 | } 42 | 43 | toString(graph, from) { 44 | let result = `${graph}_${from} -> ${graph}_${this.to == null ? "RET" : this.to}`, label = this.match.toRegexp() 45 | if (this.effect) label = (label ? label + " " : "") + this.effect.toString() 46 | if (label) result += `[label=${JSON.stringify(label)}]` 47 | return result 48 | } 49 | 50 | canCombine(other) { 51 | if (this.effect instanceof Call || other.effect instanceof Call) return false 52 | let thisIsolated = this.match.isolated || !!this.effect, otherIsolated = other.match.isolated || !!other.effect 53 | return thisIsolated ? other.match.isNull && !otherIsolated 54 | : otherIsolated ? this.match.isNull 55 | : true 56 | } 57 | } 58 | 59 | class Rule { 60 | constructor(name, expr, params, context) { 61 | this.name = name 62 | this.expr = expr 63 | this.params = params 64 | this.context = context 65 | this.instances = [] 66 | this.recursive = null 67 | this.refcount = 0 68 | } 69 | 70 | getInstance(cx, args) { 71 | for (let i = 0; i < this.instances.length; i++) { 72 | let inst = this.instances[i] 73 | if (eqExprs(inst.args, args)) { 74 | if (this.recursive !== false) 75 | this.recursive = true 76 | return inst.graph 77 | } 78 | } 79 | let graph = cx.registerGraph(this.name, new SubGraph) 80 | this.instances.push({args, graph}) 81 | let result = cx.evalExpr(instantiateArgs(this.params, args, this.expr)) 82 | graph.nodes = result.nodes 83 | if (this.recursive === null) this.recursive = false 84 | return graph 85 | } 86 | } 87 | 88 | class SubGraph { 89 | constructor() { 90 | this.name = null 91 | this.nodes = [[]] 92 | } 93 | 94 | get edgeCount() { 95 | let count = 0 96 | for (let i = 0; i < this.nodes.length; i++) 97 | count += this.nodes[i].length 98 | return count 99 | } 100 | 101 | node() { 102 | return this.nodes.push([]) - 1 103 | } 104 | 105 | edge(from, to, match, effect) { 106 | this.nodes[from].push(new Edge(to, match, effect)) 107 | } 108 | 109 | copy(from, to, source, start = 0) { 110 | let mapping = [] 111 | mapping[start] = from 112 | let work = [start], workIndex = 0 113 | while (workIndex < work.length) { 114 | let cur = work[workIndex++], edges = source.nodes[cur] 115 | for (let i = 0; i < edges.length; i++) { 116 | let edge = edges[i] 117 | if (edge.to != null && work.indexOf(edge.to) == -1) { 118 | mapping[edge.to] = this.node() 119 | work.push(edge.to) 120 | } 121 | this.edge(mapping[cur], edge.to == null ? to : mapping[edge.to], edge.match, edge.effect) 122 | } 123 | } 124 | } 125 | 126 | join(mayHaveOutgoing) { 127 | let found = [] 128 | this.edges((e, n) => { if (e.to == null) found.push(n, e) }) 129 | if (found.length == 2) { 130 | let edge = found[1], node = this.nodes[found[0]] 131 | if (edge.match == nullMatch && !edge.effect && (mayHaveOutgoing || node.length == 1)) { 132 | node.splice(node.indexOf(edge), 1) 133 | return found[0] 134 | } 135 | } 136 | let add = this.node() 137 | for (let i = 1; i < found.length; i += 2) found[i].to = add 138 | return add 139 | } 140 | 141 | edges(f) { 142 | for (let i = 0; i < this.nodes.length; i++) { 143 | let edges = this.nodes[i] 144 | for (let j = 0; j < edges.length; j++) f(edges[j], i) 145 | } 146 | } 147 | 148 | countReferences(node) { 149 | let count = 0 150 | this.edges(e => { if (e.to == node) count++ }) 151 | return count 152 | } 153 | 154 | toString() { 155 | let output = "" 156 | this.edges((e, n) => output += " " + e.toString(this.name, n) + ";\n") 157 | return output 158 | } 159 | 160 | singleEdgeFrom(node) { 161 | let edges = this.nodes[node] 162 | return edges.length == 1 ? edges[0] : null 163 | } 164 | 165 | singleEdgeTo(node) { 166 | let found = null 167 | this.edges(e => { 168 | if (e.to == node) found = found == null ? e : false 169 | }) 170 | return found === false ? null : found 171 | } 172 | 173 | get simple() { 174 | if (this.nodes.length != 1) return null 175 | let node = this.nodes[0] 176 | if (node.length != 1 || node[0].effect) return null 177 | return node[0].match 178 | } 179 | 180 | static simple(match, effect) { 181 | let graph = new SubGraph 182 | graph.edge(0, null, match, effect) 183 | return graph 184 | } 185 | } 186 | 187 | SubGraph.any = SubGraph.simple(anyMatch) 188 | SubGraph.dot = SubGraph.simple(dotMatch) 189 | 190 | const MAX_INLINE_BLOWUP = 20 191 | 192 | class Context { 193 | constructor(rules, graphs) { 194 | this.rules = rules 195 | this.graphs = graphs 196 | } 197 | 198 | registerGraph(name, graph) { 199 | for (let i = 0;; i++) { 200 | let cur = name + (i ? "_" + i : "") 201 | if (!(cur in this.graphs)) { 202 | graph.name = cur 203 | return this.graphs[cur] = graph 204 | } 205 | } 206 | } 207 | 208 | evalExpr(expr) { 209 | let t = expr.type 210 | if (t == "CharacterRange") { 211 | return SubGraph.simple(new RangeMatch(expr.from, expr.to)) 212 | } else if (t == "StringMatch") { 213 | return SubGraph.simple(new StringMatch(expr.value)) 214 | } else if (t == "AnyMatch") { 215 | return SubGraph.any 216 | } else if (t == "DotMatch") { 217 | return SubGraph.dot 218 | } else if (t == "RuleIdentifier") { 219 | return this.evalCall(expr.id.name, expr.arguments) 220 | } else if (t == "RepeatedMatch") { 221 | return this.evalRepeat(expr.expr, expr.kind) 222 | } else if (t == "SequenceMatch") { 223 | return this.evalSequence(expr.exprs) 224 | } else if (t == "ChoiceMatch") { 225 | return this.evalChoice(expr.exprs) 226 | } else if (t == "LookaheadMatch") { 227 | let inner = this.evalExpr(expr.expr), simple = inner.simple, match 228 | if (simple) { 229 | match = new LookaheadMatch(null, simple, expr.kind == "~") 230 | } else { 231 | this.registerGraph("_lookahead", inner) 232 | match = new LookaheadMatch(inner, null, expr.kind == "~") 233 | } 234 | return SubGraph.simple(match) 235 | } else if (t == "PredicateMatch") { 236 | return SubGraph.simple(new PredicateMatch(expr.id.name)) 237 | } else { 238 | throw new Error("Unrecognized AST node type " + t) 239 | } 240 | } 241 | 242 | evalCall(name, args) { 243 | let rule = this.rules[name] 244 | if (args.length != rule.params.length) throw new Error("Wrong number of arguments for " + name) 245 | let graph = rule.getInstance(this, args), simple = graph.simple 246 | if (simple) 247 | return SubGraph.simple(simple, rule.context && rule.context.token ? new Token(rule.context.token) : null) 248 | else if (!rule.recursive && !rule.context && (rule.refcount == 1 || rule.refcount * graph.edgeCount <= MAX_INLINE_BLOWUP)) 249 | return graph 250 | else 251 | return SubGraph.simple(nullMatch, new Call(graph, rule.context)) 252 | } 253 | 254 | // FIXME there's still a bug here that showed up when doing listOf(x? y) 255 | evalRepeat(expr, kind) { 256 | let inner = this.evalExpr(expr), simple 257 | if ((simple = inner.simple) && !simple.isolated) 258 | return SubGraph.simple(new RepeatMatch(simple, kind)) 259 | let graph = new SubGraph 260 | if (kind == "*") { 261 | graph.copy(0, 0, inner) 262 | graph.edge(0, null, nullMatch) 263 | } else if (kind == "+") { 264 | let next = graph.node() 265 | graph.copy(0, next, inner) 266 | graph.edge(next, 0, nullMatch) 267 | graph.edge(next, null, nullMatch) 268 | } else if (kind == "?") { 269 | graph.copy(0, null, inner) 270 | graph.edge(0, null, nullMatch) 271 | } 272 | return graph 273 | } 274 | 275 | evalSequence(exprs) { 276 | let graph = new SubGraph, edge = graph.edge(0, null, nullMatch) 277 | for (let i = 0; i < exprs.length; i++) { 278 | let next = this.evalExpr(exprs[i]) 279 | let firstEdge, copyFrom = 0 280 | if (edge && (firstEdge = next.singleEdgeFrom(0)) && !firstEdge.effect && edge.canCombine(firstEdge)) { 281 | edge.match = SeqMatch.create(edge.match, firstEdge.match) 282 | copyFrom = firstEdge.to 283 | } 284 | if (copyFrom != null) { 285 | let hasIncoming = next.countReferences(copyFrom) == (copyFrom == 0 ? 0 : 1) 286 | graph.copy(graph.join(hasIncoming), null, next, copyFrom) 287 | if (i < exprs.length - 1) 288 | edge = graph.singleEdgeTo(null) 289 | } 290 | } 291 | return graph 292 | } 293 | 294 | evalChoice(exprs) { 295 | let graph = new SubGraph 296 | for (let i = 0, last = exprs.length - 1, next = null; i <= last; i++) { 297 | let curGraph = next || this.evalExpr(exprs[i]), simple = curGraph.simple 298 | next = null 299 | if (simple) { 300 | while (i < last) { 301 | let nextExpr = this.evalExpr(exprs[i + 1]), nextSimple = nextExpr.simple 302 | if (nextSimple) { 303 | simple = ChoiceMatch.create(simple, nextSimple) 304 | i++ 305 | } else { 306 | next = nextExpr 307 | break 308 | } 309 | } 310 | graph.edge(0, null, simple) 311 | } else { 312 | let start = 0 313 | if (curGraph.countReferences(0) > 0) 314 | graph.edge(0, start = graph.node(), nullMatch) 315 | graph.copy(start, null, curGraph) 316 | } 317 | } 318 | return graph 319 | } 320 | } 321 | 322 | function gatherRules(grammar) { 323 | let info = {rules: Object.create(null), start: null, tokens: []} 324 | function gather(grammar, prefix) { 325 | let explicitStart = null 326 | for (let name in grammar.rules) { 327 | let ast = grammar.rules[name] 328 | if (ast.start) { 329 | if (explicitStart) throw new Error("Multiple start rules") 330 | explicitStart = name 331 | } 332 | let ruleName = prefix + name 333 | if (info.rules[ruleName]) continue 334 | let expr = normalizeExpr(ast.expr, name, grammar.super, ast.skip, prefix) 335 | info.rules[ruleName] = new Rule(ruleName, expr, ast.params.map(n => prefix + n.name), 336 | !ast.context && !ast.tokenType ? null : ast.tokenType ? {name: ruleName, token: ast.tokenType} 337 | : {name: ruleName}) 338 | } 339 | if (grammar.super) gather(grammar.super, prefix) 340 | for (let i = 0; i < grammar.included.length; i++) { 341 | let inc = grammar.included[i] 342 | gather(inc.ast, prefix + inc.id.name + ".") 343 | } 344 | if (explicitStart) info.start = explicitStart 345 | for (let name in grammar.rules) { 346 | if (info.start == null) info.start = name 347 | if (grammar.rules[name].isToken && info.tokens.indexOf(name) == -1) info.tokens.push(name) 348 | } 349 | } 350 | gather(grammar, "") 351 | return info 352 | } 353 | 354 | function countReferences(rules, start, tokens) { 355 | function count(name, weight) { 356 | let rule = rules[name] 357 | if (!rule) throw new Error("Undefined rule " + name) 358 | rule.refcount += weight 359 | } 360 | count(start, 1) 361 | for (let i = 0; i < tokens.length; i++) count(tokens[i], 1) 362 | 363 | function countExpr(weight, params) { 364 | return expr => { 365 | if (expr.type == "RuleIdentifier") { 366 | if (params.indexOf(expr.id.name) == -1) 367 | count(expr.id.name, weight) 368 | for (let i = 0; i < expr.arguments.length; i++) 369 | forEachExpr(expr.arguments[i], countExpr(2, params)) 370 | return false 371 | } 372 | } 373 | } 374 | 375 | for (let name in rules) { 376 | let rule = rules[name] 377 | forEachExpr(rule.expr, countExpr(rule.params.length ? 2 : 1, rule.params)) 378 | } 379 | } 380 | 381 | function gcGraphs(graphs, startNames) { 382 | let work = startNames.slice(), workIndex = 0 383 | function add(name) { 384 | if (work.indexOf(name) < 0) work.push(name) 385 | } 386 | 387 | while (workIndex < work.length) { 388 | graphs[work[workIndex++]].edges(edge => { 389 | if (edge.effect instanceof Call) add(edge.effect.target.name) 390 | edge.match.forEach(m => { 391 | if (m instanceof LookaheadMatch && m.start) add(m.start.name) 392 | }) 393 | }) 394 | } 395 | 396 | let result = Object.create(null) 397 | work.forEach(name => result[name] = graphs[name]) 398 | return result 399 | } 400 | -------------------------------------------------------------------------------- /src/matchexpr.js: -------------------------------------------------------------------------------- 1 | function escRe(str) { 2 | return str.replace(/[^\w ¡-￿]/g, ch => { 3 | if (ch == "\n") return "\\n" 4 | if (ch == "\t") return "\\t" 5 | if (ch == "\r") return "\\r" 6 | return "\\" + ch 7 | }) 8 | } 9 | 10 | function toSubRegexp(expr, wrapExpr) { 11 | if (expr.regexpPrec < wrapExpr.regexpPrec) return `(?:${expr.toRegexp()})` 12 | else return expr.toRegexp() 13 | } 14 | 15 | const OP_SEQ = 0, OP_CHOICE = 1, 16 | OP_STAR = 2, OP_PLUS = 3, OP_MAYBE = 4, 17 | OP_LOOKAHEAD = 5, OP_NEG_LOOKAHEAD = 6, 18 | OP_PREDICATE = 7 19 | 20 | class MatchExpr { 21 | constructor() {} 22 | 23 | get isNull() { return false } 24 | get simple() { return true } 25 | get isolated() { return false } 26 | 27 | get regexpPrec() { return 4 } 28 | 29 | toExpr() { 30 | return `/^${toSubRegexp(this, SeqMatch.prototype)}/` 31 | } 32 | 33 | forEach(f) { f(this) } 34 | } 35 | 36 | class StringMatch extends MatchExpr { 37 | constructor(string) { 38 | super() 39 | this.string = string 40 | } 41 | 42 | get simple() { return this.string != "\n" } 43 | get isolated() { return !this.simple } 44 | 45 | eq(other) { return other instanceof StringMatch && other.string == this.string } 46 | 47 | toRegexp() { return escRe(this.string) } 48 | 49 | get regexpPrec() { return this.string.length == 1 ? super.regexpPrec : 2 } 50 | 51 | toExpr() { return JSON.stringify(this.string) } 52 | } 53 | exports.StringMatch = StringMatch 54 | 55 | class RangeMatch extends MatchExpr { 56 | constructor(from, to) { 57 | super() 58 | this.from = from 59 | this.to = to 60 | } 61 | 62 | get simple() { return this.from > "\n" || this.to < "\n" } 63 | get isolated() { return !this.simple } 64 | 65 | eq(other) { return other instanceof RangeMatch && other.from == this.from && other.to == this.to } 66 | 67 | toRegexp() { return "[" + escRe(this.from) + "-" + escRe(this.to) + "]" } 68 | } 69 | exports.RangeMatch = RangeMatch 70 | 71 | const anyMatch = exports.anyMatch = new class AnyMatch extends MatchExpr { 72 | get simple() { return false } 73 | get isolated() { return true } 74 | eq(other) { return other == anyMatch } 75 | toRegexp() { return "[^]" } 76 | } 77 | 78 | const dotMatch = exports.dotMatch = new class DotMatch extends MatchExpr { 79 | eq(other) { return other == dotMatch } 80 | toRegexp() { return "." } 81 | } 82 | 83 | const nullMatch = exports.nullMatch = new class NullMatch extends MatchExpr { 84 | get isNull() { return true } 85 | eq(other) { return other == anyMatch } 86 | toRegexp() { return "" } 87 | toExpr() { return "null" } 88 | } 89 | 90 | class SeqMatch extends MatchExpr { 91 | constructor(matches) { 92 | super() 93 | this.matches = matches 94 | } 95 | 96 | eq(other) { return other instanceof SeqMatch && eqArray(other.matches, this.matches) } 97 | 98 | get simple() { 99 | return this.matches.every(m => m.simple) 100 | } 101 | get isolated() { 102 | return this.matches.some(m => m.isolated) 103 | } 104 | 105 | get regexpPrec() { return 2 } 106 | 107 | toRegexp() { return this.matches.map(m => toSubRegexp(m, this)).join("") } 108 | 109 | toExpr(getName) { 110 | if (this.simple) return super.toExpr() 111 | return `[${OP_SEQ}, ${this.matches.map(m => m.toExpr(getName)).join(", ")}]` 112 | } 113 | 114 | forEach(f) { f(this); this.matches.forEach(m => m.forEach(f)) } 115 | 116 | static create(left, right) { 117 | if (left == nullMatch) return right 118 | if (right == nullMatch) return left 119 | 120 | let before = left instanceof SeqMatch ? left.matches : [left] 121 | let after = right instanceof SeqMatch ? right.matches : [right] 122 | let last = before[before.length - 1], first = after[0] 123 | 124 | if (last instanceof StringMatch && first instanceof StringMatch) { 125 | after[0] = new StringMatch(last.string + right.string) 126 | before.pop() 127 | } else if (first instanceof RepeatMatch && first.type == "*") { 128 | if (last.eq(first.match)) { 129 | after[0] = new RepeatMatch(last, "+") 130 | before.pop() 131 | } else if (first.match instanceof StringMatch && last instanceof StringMatch && 132 | new RegExp(first.match.toRegexp() + "$").test(last.string)) { 133 | after[0] = new RepeatMatch(first.match, "+") 134 | before[before.length - 1] = new StringMatch(last.string.slice(0, last.string.length - first.match.string.length)) 135 | } 136 | } 137 | let matches = before.concat(after) 138 | return matches.length == 1 ? matches[0] : new SeqMatch(matches) 139 | } 140 | } 141 | exports.SeqMatch = SeqMatch 142 | 143 | class ChoiceMatch extends MatchExpr { 144 | constructor(matches) { 145 | super() 146 | this.matches = matches 147 | } 148 | 149 | get simple() { return this.matches.every(m => m.simple) } 150 | 151 | get isolated() { return this.matches.some(m => m.isolated) } 152 | 153 | eq(other) { return other instanceof ChoiceMatch && eqArray(other.matches, this.matches) } 154 | 155 | get regexpPrec() { return this.isSet() ? 4 : 1 } 156 | 157 | isSet() { 158 | return this.matches.every(m => m instanceof StringMatch && m.string.length == 1 || m instanceof RangeMatch) 159 | } 160 | 161 | // FIXME reduce to \d, \w when appropriate 162 | toRegexp() { 163 | if (this.isSet()) 164 | return `[${this.matches.map(m => m instanceof StringMatch ? escRe(m.string) : escRe(m.from) + "-" + escRe(m.to)).join("")}]` 165 | else 166 | return this.matches.map(m => toSubRegexp(m, this)).join("|") 167 | } 168 | 169 | toExpr(getName) { 170 | if (this.simple) return super.toExpr() 171 | return `[${OP_CHOICE}, ${this.matches.map(m => m.toExpr(getName)).join(", ")}]` 172 | } 173 | 174 | forEach(f) { f(this); this.matches.forEach(m => m.forEach(f)) } 175 | 176 | static create(left, right) { 177 | let matches = [] 178 | if (left instanceof ChoiceMatch) matches = matches.concat(left.matches) 179 | else matches.push(left) 180 | if (right instanceof ChoiceMatch) matches = matches.concat(right.matches) 181 | else matches.push(right) 182 | return new ChoiceMatch(matches) 183 | } 184 | } 185 | exports.ChoiceMatch = ChoiceMatch 186 | 187 | class RepeatMatch extends MatchExpr { 188 | constructor(match, type) { 189 | super() 190 | this.match = match 191 | this.type = type 192 | } 193 | 194 | get simple() { return this.match.simple } 195 | 196 | eq(other) { return other instanceof RepeatMatch && this.match.eq(other.match) && this.type == other.type } 197 | 198 | get regexpPrec() { return 3 } 199 | 200 | toRegexp() { 201 | return toSubRegexp(this.match, this) + this.type 202 | } 203 | 204 | toExpr(getName) { 205 | if (this.simple) return super.toExpr() 206 | return `[${this.type == "*" ? OP_STAR : this.type == "+" ? OP_PLUS : OP_MAYBE}, ${this.match.toExpr(getName)}]` 207 | } 208 | 209 | forEach(f) { f(this); this.match.forEach(f) } 210 | } 211 | exports.RepeatMatch = RepeatMatch 212 | 213 | class LookaheadMatch extends MatchExpr { 214 | constructor(start, expr, positive) { 215 | super() 216 | this.start = start 217 | this.expr = expr 218 | this.positive = positive 219 | } 220 | 221 | get isNull() { return true } 222 | 223 | get simple() { return !!this.expr } 224 | 225 | eq(other) { 226 | return other instanceof LookaheadMatch && other.start == this.start && 227 | (this.expr ? other.expr && this.expr.eq(other.expr) : !other.expr) && 228 | other.positive == this.positive 229 | } 230 | 231 | toRegexp() { 232 | if (this.expr) 233 | return `(?${this.positive ? "=" : "!"}${this.expr.toRegexp()})` 234 | else // Not actually a regexp, but used for graph output 235 | return "LOOKAHEAD(" + this.start + ")" 236 | } 237 | 238 | toExpr(getName) { 239 | if (this.expr) return super.toExpr() 240 | return `[${this.positive ? OP_LOOKAHEAD : OP_NEG_LOOKAHEAD}, ${getName(this.start.name)}]` 241 | } 242 | 243 | forEach(f) { f(this); if (this.expr) this.expr.forEach(f) } 244 | } 245 | exports.LookaheadMatch = LookaheadMatch 246 | 247 | class PredicateMatch extends MatchExpr { 248 | constructor(name) { 249 | super() 250 | this.name = name 251 | } 252 | 253 | get isNull() { return true } 254 | 255 | get simple() { return false } 256 | 257 | eq(other) { return other instanceof PredicateMatch && other.name == this.name } 258 | 259 | toRegexp() { return "PRED(" + this.name + ")" } 260 | 261 | toExpr() { 262 | return `[${OP_PREDICATE}, ${JSON.stringify(this.name)}]` 263 | } 264 | } 265 | exports.PredicateMatch = PredicateMatch 266 | 267 | let eqArray = exports.eqArray = function(a, b) { 268 | if (a.length != b.length) return false 269 | for (let i = 0; i < a.length; i++) if (!a[i].eq(b[i])) return false 270 | return true 271 | } 272 | -------------------------------------------------------------------------------- /src/mode.js: -------------------------------------------------------------------------------- 1 | var verbose = 0 2 | 3 | function Context(name, tokenType, depth, parent, line, pos) { 4 | this.name = name 5 | this.tokenType = tokenType 6 | this.depth = depth 7 | this.parent = parent 8 | this.startLine = line 9 | this.startPos = pos 10 | } 11 | 12 | var MAX_LOOKAHEAD_LINES = 3 13 | 14 | function MatchContext() { 15 | this.stream = null 16 | this.line = this.startPos = 0 17 | this.string = this.startLine = "" 18 | this.copyInstance = null 19 | } 20 | 21 | MatchContext.prototype.start = function(stream) { 22 | this.stream = stream 23 | this.line = 0 24 | this.string = stream.string.slice(stream.start) 25 | this.startLine = stream.string 26 | this.startPos = stream.start 27 | return this 28 | } 29 | 30 | MatchContext.prototype.startLinebreak = function() { 31 | this.stream = null 32 | this.line = this.startPos = 0 33 | this.string = "\n" 34 | this.startLine = "" 35 | return this 36 | } 37 | 38 | MatchContext.prototype.copy = function() { 39 | var copy = this.copyInstance || (this.copyInstance = new MatchContext) 40 | copy.stream = this.stream 41 | copy.startPos = this.startPos 42 | copy.line = this.line 43 | copy.startLine = this.startLine 44 | copy.string = this.string 45 | return copy 46 | } 47 | 48 | MatchContext.prototype.updateStart = function() { 49 | this.startLine = !this.stream ? "" : this.line == 0 ? this.stream.string : this.stream.lookAhead(this.line) 50 | this.startPos = this.startLine.length - (this.string.length - 1) 51 | } 52 | 53 | MatchContext.prototype.ahead = function(n) { 54 | for (;;) { 55 | if (n <= this.string.length) return true 56 | if (this.string.charCodeAt(this.string.length - 1) !== 10) { 57 | this.string += "\n" 58 | } else if (this.line === MAX_LOOKAHEAD_LINES || !this.stream || !this.stream.lookAhead) { 59 | return false 60 | } else { 61 | var next = this.stream.lookAhead(this.line + 1) 62 | if (next == null) return false 63 | this.string += next + "\n" 64 | this.line++ 65 | } 66 | } 67 | } 68 | 69 | var tokenValue = null 70 | 71 | var stateClass = function(graph, options) { 72 | function StateClass(stack, context) { 73 | this.stack = stack 74 | this.context = context 75 | } 76 | 77 | StateClass.prototype.matchNext = function(mcx, pos, maxSkip, top) { 78 | var depth = this.stack.length - 1, node = this.stack[depth], edges = graph.nodes[node] 79 | 80 | for (var i = 0; i < edges.length; i++) { 81 | var op = edges[i], matched, to // See compileEdge in compile.js 82 | if (op === 0) { // Null match 83 | matched = pos 84 | to = edges[++i] 85 | } else if (op === 1 || op === 2) { // 1, callTarget, returnTo 86 | var target = edges[++i] // 2, callTarget, returnTo, context 87 | var returnTo = edges[++i] 88 | this.go(returnTo) 89 | var oldContext = this.context 90 | if (op === 2) { 91 | var cx = edges[++i] 92 | this.context = new Context(cx.name, cx.token, this.stack.length, this.context, mcx.startLine, mcx.startPos) 93 | } 94 | this.stack.push(target) 95 | var inner = this.matchNext(mcx, pos, 0, false) 96 | if (inner === pos) inner = this.matchNext(mcx, pos, i == edges.length - 1 ? maxSkip : 0, top) 97 | if (inner < 0) { // Reset state when the call fails 98 | this.stack.length = depth + 1 99 | this.stack[depth] = node 100 | this.context = oldContext 101 | continue 102 | } 103 | return inner 104 | } else if (op === 3) { // 3, tokenType, matchExpr, nextNode 105 | var token = edges[++i] 106 | matched = this.matchExpr(edges[++i], mcx, pos) 107 | to = edges[++i] 108 | if (matched > pos) tokenValue = token 109 | } else { // matchExpr, nextNode 110 | matched = this.matchExpr(op, mcx, pos) 111 | to = edges[++i] 112 | } 113 | 114 | if (matched < 0) { 115 | if (maxSkip > 0 && i == edges.length - 1) { 116 | if (verbose > 0) console["log"]("Dead end at", mcx.string.slice(pos), node, this.stack.join()) 117 | maxSkip-- 118 | matched = pos 119 | } else { 120 | continue 121 | } 122 | } 123 | this.go(to) 124 | if (!top && to === -1 || this.stack.length === 0) return matched 125 | 126 | if (matched > pos) { 127 | if (verbose > 1) 128 | console["log"]("Token", JSON.stringify(mcx.string.slice(pos, matched)), "from", node, "to", to, "under", this.stack.join()) 129 | return matched 130 | } else { 131 | matched = this.matchNext(mcx, pos, i == edges.length - 1 ? maxSkip : 0, top) 132 | if (matched >= 0) return matched 133 | this.stack.length = depth + 1 134 | this.stack[depth] = node 135 | } 136 | } 137 | return -1 138 | } 139 | 140 | StateClass.prototype.go = function(to) { 141 | this.stack.pop() 142 | while (this.context && this.context.depth > this.stack.length) 143 | this.context = this.context.parent 144 | if (to !== -1) this.stack.push(to) 145 | } 146 | 147 | StateClass.prototype.runMaybe = function(mcx, pos, maxSkip) { 148 | tokenValue = null 149 | return this.matchNext(mcx, pos, maxSkip, true) 150 | } 151 | 152 | StateClass.prototype.forward = function(mcx, pos) { 153 | var progress = this.runMaybe(mcx, pos, 2) 154 | if (progress < 0) { 155 | if (verbose > 0) console["log"]("Lost it at", mcx.string.slice(pos), this.stack.join()) 156 | this.stack.push(graph.token) 157 | progress = this.runMaybe(mcx, pos, 0) 158 | } 159 | return progress 160 | } 161 | 162 | StateClass.prototype.lookahead = function(mcx, pos, start) { 163 | var oldTokenValue = tokenValue 164 | var state = new this.constructor([start], null) 165 | mcx = mcx.copy() 166 | for (;;) { 167 | mcx.updateStart() 168 | // FIXME implement custom scanning algorithm. This one breaks when a sub-match fails 169 | var newPos = state.runMaybe(mcx, pos, 0) 170 | if (newPos < 0) { tokenValue = oldTokenValue; return false } 171 | if (state.stack.length === 0) { tokenValue = oldTokenValue; return true } 172 | pos = newPos 173 | } 174 | } 175 | 176 | StateClass.prototype.matchExpr = function(expr, mcx, pos) { 177 | if (typeof expr === "string") { 178 | var end = pos + expr.length 179 | return mcx.ahead(end) && mcx.string.slice(pos, end) === expr ? end : -1 180 | } 181 | if (expr.exec) { 182 | var m = mcx.ahead(pos + 1) && expr.exec(pos > 0 ? mcx.string.slice(pos) : mcx.string) 183 | if (!m) return -1 184 | return pos + m[0].length 185 | } 186 | 187 | var op = expr[0] 188 | if (op === 0) { // OP_SEQ, ...rest 189 | for (var i = 1; i < expr.length; i++) { 190 | pos = this.matchExpr(expr[i], mcx, pos) 191 | if (pos < 0) return -1 192 | } 193 | return pos 194 | } else if (op === 1) { // OP_CHOICE, ...rest 195 | for (var i = 1, e = expr.length - 1;; i++) { 196 | var cur = this.matchExpr(expr[i], mcx, pos) 197 | if (i === e || cur > -1) return cur 198 | } 199 | return -1 200 | } else if (op === 2 || op === 3) { // OP_STAR/OP_PLUS, expr 201 | if (op === 3 && (pos = this.matchExpr(expr[1], mcx, pos)) < 0) return -1 202 | for (;;) { 203 | var inner = this.matchExpr(expr[1], mcx, pos) 204 | if (inner == -1) return pos 205 | pos = inner 206 | } 207 | } else if (op === 4) { // OP_MAYBE, expr 208 | return Math.max(this.matchExpr(expr[1], mcx, pos), pos) 209 | } else if (op === 5) { // OP_LOOKAHEAD, expr 210 | return this.lookahead(mcx, pos, expr[1]) ? pos : -1 211 | } else if (op === 6) { // OP_NEG_LOOKAHEAD, expr 212 | return this.lookahead(mcx, pos, expr[1]) ? -1 : pos 213 | } else if (op === 7) { // OP_PREDICATE, name 214 | var lineStart = pos ? mcx.string.lastIndexOf("\n", pos - 1) : -1, line, linePos 215 | if (mcx.stream && lineStart < 0) { 216 | line = mcx.stream.string 217 | linePos = pos + mcx.stream.start 218 | } else { 219 | var lineEnd = mcx.string.indexOf("\n", pos) 220 | line = mcx.string.slice(lineStart + 1, lineEnd < 0 ? mcx.string.length : lineEnd) 221 | linePos = pos - (lineStart + 1) 222 | } 223 | return options.predicates[expr[1]](line, linePos, this.context, mcx.stream ? nextLines(mcx.stream) : noNextLines) ? pos : -1 224 | } else { 225 | throw new Error("Unknown match type " + expr) 226 | } 227 | } 228 | 229 | function noNextLines() { return null } 230 | 231 | function nextLines(stream) { return function(n) { return stream.lookAhead(n) } } 232 | 233 | StateClass.prototype.contextAt = function(line, linePos) { 234 | var copy = this.copy(), mcx = new MatchContext, pos = 0, lastCx = this.context 235 | mcx.string = line + "\n" 236 | mcx.startLine = line 237 | for (;;) { 238 | var matched = copy.runMaybe(mcx, pos, 0) 239 | if (matched == -1) return copy.context 240 | if (matched > linePos) { 241 | var context = copy.context 242 | if (pos == linePos) { 243 | trim: while (context) { 244 | for (var prev = lastCx; prev; prev = prev.parent) if (prev === context) break trim 245 | context = context.parent 246 | } 247 | } 248 | return context 249 | } 250 | pos = matched 251 | lastCx = copy.context 252 | } 253 | } 254 | 255 | StateClass.prototype.copy = function() { 256 | return new this.constructor(this.stack.slice(), this.context) 257 | } 258 | 259 | StateClass.start = function() { 260 | return new this([graph.start], null) 261 | } 262 | 263 | return StateClass 264 | } 265 | 266 | // declare global: CodeMirror 267 | function GrammarMode(graph, options) { 268 | this.State = stateClass(graph, options || {}) 269 | this.mcx = new MatchContext 270 | } 271 | CodeMirror.GrammarMode = GrammarMode 272 | 273 | GrammarMode.prototype.startState = function() { return this.State.start() } 274 | 275 | GrammarMode.prototype.copyState = function(state) { return state.copy() } 276 | 277 | GrammarMode.prototype.token = function(stream, state) { 278 | stream.pos += state.forward(this.mcx.start(stream), 0) 279 | var tokenType = tokenValue 280 | for (var cx = state.context; cx; cx = cx.parent) 281 | if (cx.tokenType) tokenType = cx.tokenType + (tokenType ? " " + tokenType : "") 282 | if (stream.eol()) 283 | state.forward(this.mcx, stream.pos - stream.start) 284 | return tokenType 285 | } 286 | 287 | GrammarMode.prototype.blankLine = function(state) { 288 | state.forward(this.mcx.startLinebreak(), 0) 289 | } 290 | -------------------------------------------------------------------------------- /src/parse.js: -------------------------------------------------------------------------------- 1 | module.exports = function(file, fileName) { 2 | return parseGrammar(new Input(file, fileName), 0) 3 | } 4 | 5 | class Node { 6 | constructor(type, start, props, end) { 7 | this.type = type 8 | this.start = start 9 | this.end = end 10 | if (props) for (let prop in props) this[prop] = props[prop] 11 | } 12 | } 13 | 14 | const wordChar = /[\w_$]/ 15 | 16 | class Input { 17 | constructor(string, fileName) { 18 | this.string = string 19 | this.fileName = fileName 20 | this.type = "sof" 21 | this.value = null 22 | this.start = this.end = this.lastEnd = 0 23 | this.next() 24 | } 25 | 26 | lineInfo(pos) { 27 | for (let line = 1, cur = 0;;) { 28 | let next = this.string.indexOf("\n", cur) 29 | if (next > -1 && next < pos) { 30 | ++line 31 | cur = next + 1 32 | } else { 33 | return {line, ch: pos - cur, fileName: this.fileName} 34 | } 35 | } 36 | } 37 | 38 | raise(msg, pos) { 39 | let info = this.lineInfo(pos) 40 | throw new SyntaxError(`${msg} (${info.fileName ? info.fileName + " " : ""}${info.line}:${info.ch})`) 41 | } 42 | 43 | match(pos, re) { 44 | let match = re.exec(this.string.slice(pos)) 45 | return match ? pos + match[0].length : -1 46 | } 47 | 48 | next() { 49 | this.lastEnd = this.end 50 | let start = this.match(this.end, /^(\s|\/\/.*|\/\*[^]*?\*\/)*/) 51 | if (start == this.string.length) return this.set("eof", null, start, start) 52 | 53 | let next = this.string[start] 54 | if (next == '"') { 55 | let end = this.match(start + 1, /^(\\.|[^"])*"/) 56 | if (end == -1) this.raise("Unterminated string literal", start) 57 | return this.set("string", JSON.parse(this.string.slice(start, end)), start, end) 58 | } else if (/[()|&~!\-+*?{}\.,=]/.test(next)) { 59 | return this.set(next, null, start, start + 1) 60 | } else if (wordChar.test(next)) { 61 | let end = start + 1 62 | while (end < this.string.length && wordChar.test(this.string[end])) end++ 63 | return this.set("id", this.string.slice(start, end), start, end) 64 | } else { 65 | this.raise("Unexpected character " + JSON.stringify(next), start) 66 | } 67 | } 68 | 69 | set(type, value, start, end) { 70 | this.type = type 71 | this.value = value 72 | this.start = start 73 | this.end = end 74 | } 75 | 76 | startNode(type, props) { 77 | return new Node(type, this.start, props) 78 | } 79 | 80 | finishNode(node, type) { 81 | if (type != null) node.type = type 82 | node.end = this.lastEnd 83 | return node 84 | } 85 | 86 | eat(type, value) { 87 | if (this.type == type && (value == null || this.value === value)) { 88 | this.next() 89 | return true 90 | } else { 91 | return false 92 | } 93 | } 94 | 95 | unexpected() { 96 | this.raise(`Unexpected token '${this.string.slice(this.start, this.end)}'`, this.start) 97 | } 98 | } 99 | 100 | function parseGrammar(input) { 101 | let node = input.startNode("GrammarDeclaration", { 102 | rules: Object.create(null), 103 | extends: null, 104 | included: [] 105 | }) 106 | 107 | for (;;) { 108 | let start = input.start 109 | if (input.eat("id", "extends")) { 110 | if (node.extends) input.raise("Can't extend multiple grammars", start) 111 | if (input.type != "string") input.unexpected() 112 | node.extends = input.value 113 | input.next() 114 | } else if (input.eat("id", "include")) { 115 | let inclNode = new Node("IncludeDeclaration", start) 116 | if (input.type != "string") input.unexpected() 117 | inclNode.value = input.value 118 | input.next() 119 | if (!input.eat("id", "as")) input.unexpected() 120 | inclNode.id = parseIdent(input) 121 | node.included.push(input.finishNode(inclNode)) 122 | } else { 123 | break 124 | } 125 | } 126 | 127 | while (input.type != "eof") { 128 | if (input.eat("id", "skip")) { 129 | let skipExpr = parseExprChoice(input) 130 | if (!input.eat("{")) input.unexpected() 131 | while (!input.eat("}")) 132 | parseRule(input, node.rules, false, skipExpr) 133 | } else if (input.eat("id", "tokens")) { 134 | if (!input.eat("{")) input.unexpected() 135 | while (!input.eat("}")) 136 | parseRule(input, node.rules, true, null) 137 | } else { 138 | parseRule(input, node.rules, false, null) 139 | } 140 | } 141 | return input.finishNode(node) 142 | } 143 | 144 | function parseRule(input, rules, isToken, skip) { 145 | let node = input.startNode("RuleDeclaration", { 146 | isToken, 147 | // FIXME Storing the same sub-ast in multiple nodes is a rather 148 | // weird way to build an AST 149 | skip, 150 | context: input.eat("id", "context"), 151 | start: input.eat("id", "start"), 152 | id: parseIdent(input), 153 | tokenType: null, 154 | params: [] 155 | }) 156 | if (node.id.name in rules) 157 | input.raise(`Duplicate rule declaration '${node.id.name}'`, node.id.start) 158 | rules[node.id.name] = node 159 | 160 | if (input.eat("(")) while (!input.eat(")")) { 161 | if (node.params.length && !input.eat(",")) input.unexpected() 162 | node.params.push(parseIdent(input)) 163 | } 164 | if (isToken && node.params.length > 0) 165 | input.raise("Token rules must not take parameters", node.params[0].start) 166 | if (input.eat("=")) { 167 | if (input.type != "string") input.unexpected() 168 | node.tokenType = input.value 169 | input.next() 170 | node.context = true 171 | } 172 | if (!input.eat("{")) input.unexpected() 173 | node.expr = parseExprChoice(input) 174 | if (!input.eat("}")) input.unexpected() 175 | return input.finishNode(node) 176 | } 177 | 178 | function parseExprInner(input) { 179 | if (input.eat("(")) { 180 | let expr = parseExprChoice(input) 181 | if (!input.eat(")")) input.unexpected() 182 | return expr 183 | } 184 | 185 | let node = input.startNode() 186 | if (input.type == "string") { 187 | let value = input.value 188 | input.next() 189 | if (value.length == 1 && input.eat("-")) { 190 | if (input.type != "string" || input.value.length != 1) input.unexpected() 191 | node.from = value 192 | node.to = input.value 193 | input.next() 194 | return input.finishNode(node, "CharacterRange") 195 | } else { 196 | if (value.length == 0) input.raise("Empty strings are not valid in grammars", node.start) 197 | node.value = value 198 | return input.finishNode(node, "StringMatch") 199 | } 200 | } else if (input.eat("id", "super")) { 201 | return input.finishNode(node, "SuperMatch") 202 | } else if (input.eat("&")) { 203 | node.id = parseIdent(input) 204 | return input.finishNode(node, "PredicateMatch") 205 | } else if (input.eat("id", "_")) { 206 | return input.finishNode(node, "AnyMatch") 207 | } else if (input.eat(".")) { 208 | return input.finishNode(node, "DotMatch") 209 | } else { 210 | node.id = parseDottedIdent(input) 211 | node.arguments = [] 212 | if (input.start == node.id.end && input.eat("(")) while (!input.eat(")")) { 213 | if (node.arguments.length && !input.eat(",")) input.unexpected() 214 | node.arguments.push(parseExprChoice(input)) 215 | } 216 | return input.finishNode(node, "RuleIdentifier") 217 | } 218 | } 219 | 220 | function parseExprSuffix(input) { 221 | let start = input.start 222 | let expr = parseExprInner(input) 223 | if (input.type == "*" || input.type == "?" || input.type == "+") { 224 | let node = new Node("RepeatedMatch", start, { 225 | expr, 226 | kind: input.type 227 | }, input.end) 228 | input.next() 229 | return node 230 | } 231 | return expr 232 | } 233 | 234 | function parseExprLookahead(input) { 235 | if (input.type == "!" || input.type == "~") { 236 | let node = input.startNode("LookaheadMatch", {kind: input.type}) 237 | input.next() 238 | node.expr = parseExprSuffix(input) 239 | return input.finishNode(node) 240 | } else { 241 | return parseExprSuffix(input) 242 | } 243 | } 244 | 245 | function endOfSequence(input) { 246 | return input.type == "}" || input.type == ")" || input.type == "|" || input.type == "{" || input.type == "," 247 | } 248 | 249 | function parseExprSequence(input) { 250 | let start = input.start, first = parseExprLookahead(input) 251 | if (endOfSequence(input)) return first 252 | let node = new Node("SequenceMatch", start, {exprs: [first]}) 253 | do { node.exprs.push(parseExprLookahead(input)) } 254 | while (!endOfSequence(input)) 255 | return input.finishNode(node) 256 | } 257 | 258 | function parseExprChoice(input) { 259 | let start = input.start, left = parseExprSequence(input) 260 | if (!input.eat("|")) return left 261 | let node = new Node("ChoiceMatch", start, {exprs: [left]}) 262 | do { node.exprs.push(parseExprSequence(input)) } 263 | while (input.eat("|")) 264 | return input.finishNode(node) 265 | } 266 | 267 | function parseIdent(input) { 268 | if (input.type != "id") input.unexpected() 269 | let node = input.startNode("Identifier", {name: input.value}) 270 | input.next() 271 | return input.finishNode(node) 272 | } 273 | 274 | function parseDottedIdent(input) { 275 | if (input.type != "id") input.unexpected() 276 | let node = input.startNode("Identifier", {name: input.value}) 277 | input.next() 278 | while (input.start == input.lastEnd && input.eat(".")) { 279 | if (input.type != "id") input.unexpected() 280 | node.name += "." + input.value 281 | input.next() 282 | } 283 | return input.finishNode(node) 284 | } 285 | --------------------------------------------------------------------------------