├── .eslintrc.json ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── GNUmakefile ├── README.md ├── generator ├── .gitignore ├── dub.sdl ├── dub.selections.json └── source │ ├── ddoc.d │ ├── generator.d │ ├── grammar.d │ ├── parser.d │ └── writer.d ├── grammar.js ├── package-lock.json ├── package.json ├── src └── scanner.cc └── test ├── corpus ├── 2_lex-13_floatliteral.txt ├── 2_lex-16_special_token_sequence.txt ├── 2_lex-1_source_text.txt ├── 2_lex-6_comment.txt ├── 2_lex-9_string_literals.txt └── 30_iasm-11_gcc.txt ├── parse-success-xfail.txt ├── parse-success ├── dmd │ ├── compilable │ └── runnable └── dmd_asm.d ├── repos └── README.md └── tmp └── .gitignore /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "es2017": true 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [ push, pull_request ] 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | with: 9 | submodules: true 10 | 11 | - uses: actions/setup-node@v2 12 | with: 13 | node-version: 14 14 | 15 | - run: make test 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # npm install 2 | /node_modules/ 3 | 4 | # tree-sitter generate 5 | !/src/ 6 | /src/* 7 | !/src/scanner.cc 8 | /bindings/ 9 | /Cargo.toml 10 | binding.gyp 11 | 12 | # tree-sitter build-wasm 13 | /tree-sitter-d.wasm 14 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dlang.org"] 2 | path = generator/dlang.org 3 | url = https://github.com/CyberShadow/d-programming-language.org 4 | [submodule "test/repos/dmd"] 5 | path = test/repos/dmd 6 | url = https://github.com/dlang/dmd.git 7 | -------------------------------------------------------------------------------- /GNUmakefile: -------------------------------------------------------------------------------- 1 | # Configuration 2 | 3 | TREE_SITTER=node_modules/.bin/tree-sitter 4 | DOCKER_FLAG=--docker 5 | SO_SUFFIX=.so 6 | 7 | # Constants 8 | 9 | PARSER=src/parser.c 10 | XDG_CACHE_HOME=$(HOME)/.cache 11 | SO=$(XDG_CACHE_HOME)/tree-sitter/lib/d$(SO_SUFFIX) 12 | WASM=tree-sitter-d.wasm 13 | TEST_TS_FILES=$(shell find test/corpus -type f) 14 | TEST_TS_OK=test/tmp/tree-sitter-test.ok 15 | TEST_PARSE_SUCCESS_OK=test/tmp/parse-success.ok 16 | TEST_PARSE_SUCCESS_XFAIL_OK=$(addsuffix .ok,$(subst test/parse-success-xfail/,test/tmp/parse-success-xfail/,$(shell find test/parse-success-xfail -type f))) 17 | 18 | # Entry points 19 | 20 | all : compile 21 | parser : $(PARSER) 22 | compile : $(SO) 23 | wasm : $(WASM) 24 | 25 | test : test-ts test-parse-success 26 | test-ts : $(TEST_TS_OK) 27 | test-parse-success : $(TEST_PARSE_SUCCESS_OK) 28 | 29 | # Implementation 30 | 31 | .PHONY : all parser compile wasm test test-ts test-parse-success test-parse-success-xfail web-ui 32 | 33 | # The default is to use the tree-sitter version which would be 34 | # installed by npm (according to package.json / package-lock.json). 35 | # If it hasn't been installed yet, do so automatically. 36 | node_modules/.bin/tree-sitter : 37 | npm install 38 | 39 | # Build the grammar (grammar.json, parser.c etc.) 40 | $(PARSER) : grammar.js src/scanner.cc $(TREE_SITTER) 41 | $(TREE_SITTER) generate 42 | 43 | # Build a shared object binary from the parser 44 | # This file mainly exists to avoid race conditions / duplicate work 45 | # when running the test targets in parallel. 46 | $(SO) : $(PARSER) 47 | @# No explicit "compile" command, so just parse an empty file 48 | $(TREE_SITTER) parse -q /dev/null 49 | 50 | # Build a WASM binary from the parser 51 | # The default is to use Docker, which will ensure that the correct version is used 52 | # (https://github.com/tree-sitter/tree-sitter/pull/1180). 53 | # Run with DOCKER_FLAG= to use the host Emscripten version. 54 | $(WASM) : $(PARSER) 55 | $(TREE_SITTER) build-wasm $(DOCKER_FLAG) 56 | 57 | # Launch web-ui 58 | web-ui : $(WASM) 59 | $(TREE_SITTER) web-ui 60 | 61 | # tree-sitter test suite 62 | $(TEST_TS_OK) : $(TEST_TS_FILES) $(SO) 63 | $(TREE_SITTER) test 64 | @touch $@ 65 | 66 | # parse-success 67 | 68 | PARSE_SUCCESS_RESULTS=test/tmp/parse-success-results.txt 69 | PARSE_SUCCESS_XFAIL_IN=test/parse-success-xfail.txt 70 | PARSE_SUCCESS_XFAIL=test/tmp/parse-success-xfail.txt 71 | 72 | $(PARSE_SUCCESS_RESULTS) : $(SO) 73 | rm -f $@ 74 | find -L test/parse-success -type f -name '*.d' -o -name '*.di' | sort | $(TREE_SITTER) parse -q --paths /dev/stdin | awk '{print $$1}' > $@ 75 | 76 | $(PARSE_SUCCESS_XFAIL) : $(PARSE_SUCCESS_XFAIL_IN) 77 | grep '^[^#]' $< | sort > $@ 78 | 79 | $(TEST_PARSE_SUCCESS_OK) : $(PARSE_SUCCESS_RESULTS) $(PARSE_SUCCESS_XFAIL) 80 | diff -u $+ 81 | @touch $@ 82 | 83 | # parse-success-xfail 84 | test/tmp/parse-success-xfail/%.ok : test/parse-success-xfail/% $(SO) 85 | if $(TREE_SITTER) parse -q $< ; then exit 1 ; fi 86 | @mkdir -p "$$(dirname $@)" 87 | @touch $@ 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | tree-sitter-d 2 | ============= 3 | 4 | This repository hosts a [tree-sitter](https://tree-sitter.github.io/) grammar for the [D programming language](https://dlang.org/). 5 | 6 | About 7 | ----- 8 | 9 | The process of generating the grammar consists of a number of steps. The following lists the full process that the grammar goes through. 10 | 11 | 1. The origin of the grammar described here is [the official specification of the D programming language](https://dlang.org/spec/spec.html). 12 | 13 | Though it can be perused online, we use the source code, which is written in [DDoc](https://dlang.org/spec/ddoc.html) (the D documentation macro processor) 14 | and is maintained in the [dlang/dlang.org GitHub repository](https://github.com/dlang/dlang.org/tree/master/spec). 15 | 16 | The `generated/dlang.org` submodule points to the copy that is used by this repository, which may contain some fixes 17 | (whether to make it more machine-readable or to more accurately describe the language) which have not been upstreamed yet. 18 | 19 | 2. The grammar is then consumed by a custom program which attempts to automatically convert it as much as feasible into a tree-sitter grammar. 20 | This program and its output are located in [the `generated` branch](https://github.com/CyberShadow/tree-sitter-d/tree/generated). 21 | 22 | The first step of processing the grammar is to parse it. 23 | Thus, the grammar specification above is parsed into a DOM representing the document structure, with one node per DDoc macro. 24 | 25 | Though the canonical way to consume DDoc documents is to specify a file with custom macro definitions and to run DMD's DDoc macro processor using it, 26 | the approach used here was to implement a [simple DDoc parser](https://github.com/CyberShadow/tree-sitter-d/blob/master/generator/source/ddoc.d) instead 27 | (which also helped validate our assumptions about DDoc syntax). 28 | 29 | 3. The DDoc DOM is then converted to the initial grammar definition, which roughly corresponds to tree-sitter grammar structure. 30 | The conversion is done in the [parser](https://github.com/CyberShadow/tree-sitter-d/blob/master/generator/source/parser.d) module. 31 | 32 | 4. After conversion, the grammar passes through a few preprocessing steps. 33 | These mold the grammar into a shape which is more useful to be used for typical tree-sitter applications. 34 | 35 | Two main preprocessing steps are: 36 | 37 | - De-recursion, which converts definitions for lists of things from a recursive definition to one using explicit repetition. 38 | (Example: [`ImportList`](https://dlang.org/spec/module.html#ImportList)) 39 | 40 | - Body extraction, which splits some definitions into two, in which one is the definition "body" containing the operation actually described by the definition's name, 41 | and the other is a hidden rule which resolves either to the body or to the next operation with higher precedence. 42 | (Example: [`OrOrExpression`](https://dlang.org/spec/expression.html#OrOrExpression)) 43 | 44 | The grammar is then optimized to reduce redundancies manifested during preprocessing. 45 | 46 | 5. The grammar is now ready to be saved to `grammar.js`, the tree-sitter definition of the grammar. 47 | 48 | The latest version of this generated file can be found [in the root of the `generated` branch](https://github.com/CyberShadow/tree-sitter-d/blob/generated/grammar.js). 49 | 50 | 6. The generated file is not quite ready to be used, and requires some manual fixups. 51 | 52 | For this purpose, the `master` branch holds these fixes on top of the `generated` branch (which is merged into `master` regularly). 53 | 54 | You can see all manual fixes by [comparing the two branches](https://github.com/CyberShadow/tree-sitter-d/compare/generated..master#diff-919ac210accac9ecc55a76d10a7590e3d85ca3f0e165b52d30f08faee486d0cb). 55 | 56 | The `master` branch also hosts the test suite, as well as the [custom scanner](https://github.com/CyberShadow/tree-sitter-d/blob/master/src/scanner.cc), 57 | which implements D-specific syntax which cannot be described using the declarative tree-sitter grammar, such as nested comments or delimited string literals. 58 | 59 | 7. From this point, `grammar.js` is ready to be passed on to tree-sitter's build process, so the steps below simply describe how any tree-sitter grammar is compiled. 60 | 61 | `tree-sitter-cli` is used to generate the parser C source code from `grammar.js`. If installed via `npm` (i.e. `npm install`), this can be done by running: 62 | 63 | ``` 64 | ./node_modules/.bin/tree-sitter generate 65 | ``` 66 | 67 | This will populate the `src` directory, as well as create [additional build files](https://github.com/cybershadow/tree-sitter-d/blob/master/.gitignore#L4-L9). 68 | 69 | 8. Finally, the C source code is compiled into a loadable shared library, which can be directly used by a tree-sitter-enabled application. 70 | 71 | This step happens automatically when running `tree-sitter test`. 72 | Alternatively, invoking `tree-sitter build-wasm` builds a WebAssembly module instead of a native shared object. 73 | 74 | Contributing 75 | ------------ 76 | 77 | If you would like to help, please have a look at the [list of open issues](https://github.com/CyberShadow/tree-sitter-d/issues). 78 | 79 | If you spot an error in the grammar or the way it behaves and would like to fix it, the first step would be to identify the correct place to perform the fix. 80 | 81 | - If the problem is due to an incorrect grammar definition, and the error is also present in [the official specification](https://dlang.org/spec/spec.html), 82 | then please fix and send a pull request there. 83 | 84 | - Otherwise, if you believe that the problem is due to a translation error between the official grammar and the generated `grammar.js` file, 85 | then it may be due to a bug in [the generator program](https://github.com/CyberShadow/tree-sitter-d/tree/generated/generator). 86 | 87 | - Finally, if the problem is tree-sitter specific or cannot be fixed through the above avenues, 88 | then the fix should be applied to [`grammar.js` on the master branch](https://github.com/CyberShadow/tree-sitter-d/blob/master/grammar.js). 89 | 90 | If you are having trouble with anything, please don't hesitate to [open an issue](https://github.com/CyberShadow/tree-sitter-d/issues/new). 91 | -------------------------------------------------------------------------------- /generator/.gitignore: -------------------------------------------------------------------------------- 1 | # Dub 2 | /.dub 3 | /generator 4 | 5 | # rdmd / dmd -i / rund 6 | /source/generator 7 | -------------------------------------------------------------------------------- /generator/dub.sdl: -------------------------------------------------------------------------------- 1 | name "generator" 2 | targetType "executable" 3 | dependency "ae" version="==0.0.3058" 4 | -------------------------------------------------------------------------------- /generator/dub.selections.json: -------------------------------------------------------------------------------- 1 | { 2 | "fileVersion": 1, 3 | "versions": { 4 | "ae": "0.0.3058" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /generator/source/ddoc.d: -------------------------------------------------------------------------------- 1 | module ddoc; 2 | 3 | import std.algorithm.comparison; 4 | import std.algorithm.searching; 5 | import std.ascii; 6 | import std.exception; 7 | import std.format; 8 | import std.functional; 9 | import std.string; 10 | 11 | import ae.utils.array; 12 | import ae.utils.meta; 13 | 14 | /// A DDoc DOM node 15 | struct Node 16 | { 17 | /// Node type 18 | enum Type 19 | { 20 | text, /// Verbatim inline text 21 | call, /// Macro call 22 | parameter, /// Placeholder for parameter in macro definition 23 | } 24 | Type type; /// ditto 25 | 26 | union 27 | { 28 | /// When type == Type.text 29 | string text; 30 | 31 | /// When type == Type.call 32 | struct Call 33 | { 34 | string macroName; /// The macro being called 35 | const(Node)[] contents; /// The arguments (comma-separated). 36 | 37 | /// Split `contents` into individual arguments. 38 | DDoc[] splitArguments() const 39 | { 40 | auto arguments = contents.split(','); 41 | // Remove the optional space after each , 42 | foreach (ref ddoc; arguments[1 .. $]) 43 | if (ddoc.length && ddoc[0].type == Node.Type.text) 44 | ddoc[0].text.skipOver(" "); 45 | return arguments; 46 | } 47 | 48 | /// Expand this macro call using the given definition. 49 | DDoc expand(const(Node)[] definition) const 50 | { 51 | auto arguments = splitArguments(); 52 | DDoc visit(const(Node)[] def) 53 | { 54 | DDoc result; 55 | foreach (defNode; def) 56 | final switch (defNode.type) 57 | { 58 | case Type.text: 59 | result ~= defNode; 60 | break; 61 | case Type.call: 62 | { 63 | Node node = defNode; 64 | node.call.contents = visit(defNode.call.contents); 65 | result ~= node; 66 | break; 67 | } 68 | case Type.parameter: 69 | switch (defNode.parameter) 70 | { 71 | case '1': 72 | .. 73 | case '9': 74 | result ~= arguments.get(defNode.parameter - '1'); 75 | break; 76 | case '0': 77 | result ~= contents; 78 | break; 79 | default: 80 | throw new Exception("Don't understand macro parameter $" ~ defNode.parameter); 81 | } 82 | break; 83 | } 84 | return result; 85 | } 86 | return visit(definition); 87 | } 88 | } 89 | Call call; /// ditto 90 | 91 | /// When type == Type.parameter 92 | char parameter; 93 | } 94 | 95 | /// Helper getters 96 | bool isText (string text ) const { return type == Node.Type.text && this.text == text ; } 97 | bool isCallTo(string macroName) const { return type == Node.Type.call && this.call.macroName == macroName; } /// ditto 98 | 99 | string getSingleTextChild() const 100 | { 101 | enforce( 102 | type == Type.call && 103 | call.contents.length == 1 && 104 | call.contents[0].type == .Node.Type.text, 105 | "Macro does not have a single text child" 106 | ); 107 | return call.contents[0].text; 108 | } /// ditto 109 | 110 | bool isCallToEmpty(string macroName) const { return isCallTo(macroName) && !call.contents.length; } /// ditto 111 | 112 | void toString(scope void delegate(const(char)[]) sink) const 113 | { 114 | final switch (type) 115 | { 116 | case Type.text: sink.formattedWrite!"Node(%s, %(%s%))"(type, text.toArray); return; 117 | case Type.call: sink.formattedWrite!"%s"(call); return; 118 | case Type.parameter: sink.formattedWrite!"%s"(parameter); return; 119 | } 120 | } /// 121 | } 122 | 123 | /// A DDoc span is a list of root nodes. 124 | alias DDoc = Node[]; 125 | 126 | private bool isMacroNameChar(char c) { return isAlphaNum(c) || c == '_'; } 127 | 128 | private DDoc parseDDocFragment(ref string s, bool topLevel) 129 | { 130 | DDoc ddoc; 131 | size_t parenDepth; 132 | bool verbatim; 133 | scope (success) enforce(!verbatim, "Unclosed code block"); 134 | 135 | while (true) 136 | { 137 | if (!s.length) 138 | { 139 | enforce(topLevel, "Unexpected end of file"); 140 | return ddoc; 141 | } 142 | 143 | switch (s[0]) 144 | { 145 | case '\n': 146 | if (s[1 .. $].findSplit("\n")[0].strip.I!(line => line.length >= 3 && line.representation.all!(c => c == '-'))) 147 | verbatim = !verbatim; 148 | goto default; 149 | 150 | case '$': 151 | { 152 | if (verbatim) goto default; 153 | Node node; 154 | if (s.length > 1 && s[1].among('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+')) 155 | { 156 | node.type = Node.Type.parameter; 157 | node.parameter = s[1]; 158 | s = s[2 .. $]; 159 | } 160 | else 161 | { 162 | // enforce(s[1 .. $].startsWith("("), "Expected macro call after $"); 163 | if (!s[1 .. $].startsWith("(")) goto default; 164 | node.type = Node.Type.call; 165 | s = s[2 .. $]; 166 | auto end = s.representation.countUntil!(not!isMacroNameChar); 167 | enforce(end > 0, "Expected macro name after $("); 168 | node.call.macroName = s[0 .. end]; 169 | if (s[end] == ' ') end++; // Skip the space after the macro name 170 | s = s[end .. $]; 171 | node.call.contents = parseDDocFragment(s, false); 172 | } 173 | ddoc ~= node; 174 | break; 175 | } 176 | 177 | case '(': 178 | if (verbatim) goto default; 179 | parenDepth++; 180 | goto default; 181 | 182 | case ')': 183 | if (verbatim) goto default; 184 | if (parenDepth) 185 | { 186 | parenDepth--; 187 | goto default; 188 | } 189 | enforce(!topLevel, "Mismatched )"); 190 | s = s[1 .. $]; 191 | return ddoc; 192 | 193 | default: 194 | if (!ddoc.length || ddoc[$-1].type != Node.Type.text) 195 | ddoc ~= Node(Node.Type.text); 196 | ddoc[$-1].text ~= s[0]; 197 | s = s[1 .. $]; 198 | break; 199 | } 200 | } 201 | 202 | assert(false); 203 | } 204 | 205 | /// A DDoc document. 206 | struct Document 207 | { 208 | DDoc contents; /// Document body. 209 | DDoc[string] macros; /// Local macro definitions. 210 | } 211 | 212 | /// Parse into DOM 213 | Document parseDDoc(string s) 214 | { 215 | auto os = s; 216 | scope(failure) 217 | { 218 | import std.stdio : stderr; 219 | stderr.writefln("Error at line %d:", 220 | 1 + os[0 .. $ - s.length].representation.count('\n'), 221 | ); 222 | } 223 | auto parts = s.findSplit("\nMacros:\n"); 224 | os = s = parts[0]; 225 | Document document; 226 | document.contents = parseDDocFragment(s, true); 227 | document.macros = parseMacros(parts[2]); 228 | return document; 229 | } 230 | 231 | /// Parse the "Macros:" section of a .dd file, or a .ddoc file. 232 | DDoc[string] parseMacros(string s) 233 | { 234 | DDoc[string] macros; 235 | 236 | size_t contentsStartPos, contentsEndPos, nameStartPos; 237 | bool maybeInName = true; 238 | string currentName; 239 | 240 | void flush(size_t endPos) 241 | { 242 | auto contents = s[contentsStartPos .. endPos]; 243 | if (!currentName) 244 | { 245 | enforce(contents.strip.length == 0, "Macro body without name"); 246 | return; 247 | } 248 | macros[currentName] = parseDDocFragment(contents, true); 249 | currentName = null; 250 | } 251 | 252 | size_t i; 253 | scope(failure) 254 | { 255 | import std.stdio : stderr; 256 | stderr.writefln("Error at line %d:", 257 | 1 + s[0 .. i].representation.count('\n'), 258 | ); 259 | } 260 | 261 | for (; i < s.length; i++) 262 | { 263 | auto c = s[i]; 264 | switch (c) 265 | { 266 | case '=': 267 | if (!maybeInName) 268 | goto default; 269 | maybeInName = false; 270 | flush(contentsEndPos); 271 | currentName = s[nameStartPos .. i].strip; 272 | contentsStartPos = i + 1; 273 | break; 274 | case '\n': 275 | contentsEndPos = i; 276 | nameStartPos = i + 1; 277 | maybeInName = true; 278 | break; 279 | default: 280 | if (!isWhite(c) && !isMacroNameChar(c)) 281 | maybeInName = false; 282 | break; 283 | } 284 | } 285 | flush(s.length); 286 | 287 | return macros; 288 | } 289 | 290 | /// Split `contents` by `delim`, like `std.string.split`. 291 | DDoc[] split(const DDoc contents, char delim) 292 | { 293 | DDoc slice(size_t startNodeIndex, size_t startOffset, size_t endNodeIndex, size_t endOffset) 294 | { 295 | DDoc result; 296 | foreach (nodeIndex; startNodeIndex .. endNodeIndex + (endOffset > 0)) 297 | { 298 | Node node = contents[nodeIndex]; 299 | if (nodeIndex == endNodeIndex && endOffset > 0) 300 | { 301 | assert(node.type == Node.Type.text); 302 | node.text = node.text[0 .. endOffset]; 303 | } 304 | if (nodeIndex == startNodeIndex && startOffset > 0) 305 | { 306 | assert(node.type == Node.Type.text); 307 | node.text = node.text[startOffset .. $]; 308 | } 309 | result ~= node; 310 | } 311 | return result; 312 | } 313 | 314 | DDoc[] result; 315 | size_t startNodeIndex = 0, startOffset = 0; 316 | foreach (nodeIndex, ref node; contents) 317 | if (node.type == Node.Type.text) 318 | foreach (offset; 0 .. node.text.length) 319 | if (node.text[offset] == delim) 320 | { 321 | result ~= slice(startNodeIndex, startOffset, nodeIndex, offset); 322 | 323 | startNodeIndex = nodeIndex; 324 | startOffset = offset; 325 | startOffset++; 326 | if (startOffset == node.text.length) 327 | { 328 | startNodeIndex++; 329 | startOffset = 0; 330 | } 331 | } 332 | result ~= slice(startNodeIndex, startOffset, contents.length, 0); 333 | return result; 334 | } 335 | 336 | /// Remove whitespace from around `d`, like `std.string.strip`. 337 | inout(DDoc) strip(/*DDoc*/inout(Node)[] d) 338 | { 339 | while (d.length && d[0].type == Node.Type.text) 340 | { 341 | auto s = d[0].text.stripLeft(); 342 | if (!s.length) 343 | d = d[1 .. $]; 344 | else 345 | { 346 | d = Node(Node.Type.text, s) ~ d[1 .. $]; 347 | break; 348 | } 349 | } 350 | while (d.length && d[$-1].type == Node.Type.text) 351 | { 352 | auto s = d[$-1].text.stripRight(); 353 | if (!s.length) 354 | d = d[0 .. $-1]; 355 | else 356 | { 357 | d = d[0 .. $-1] ~ Node(Node.Type.text, s); 358 | break; 359 | } 360 | } 361 | return d; 362 | } 363 | 364 | 365 | /// Converts to a string by replacing basic macros with their characters. 366 | string toString(in Node[] d) 367 | { 368 | string s; 369 | foreach (ref node; d) 370 | { 371 | if (node.type == Node.Type.text) 372 | s ~= node.text.replace(`\\`, `\`); 373 | else 374 | if (node.isCallToEmpty("AMP")) 375 | s ~= "&"; 376 | else 377 | if (node.isCallToEmpty("LT")) 378 | s ~= "<"; 379 | else 380 | if (node.isCallToEmpty("GT")) 381 | s ~= ">"; 382 | else 383 | if (node.isCallToEmpty("LPAREN")) 384 | s ~= "("; 385 | else 386 | if (node.isCallTo("RPAREN")) 387 | s ~= ")"; 388 | else 389 | if (node.isCallTo("BACKTICK")) 390 | s ~= "`"; 391 | else 392 | throw new Exception("Can't stringify: %s".format(node)); 393 | } 394 | return s; 395 | } 396 | 397 | private alias strip = std.string.strip; 398 | -------------------------------------------------------------------------------- /generator/source/generator.d: -------------------------------------------------------------------------------- 1 | import std.algorithm.comparison; 2 | import std.algorithm.iteration; 3 | import std.algorithm.searching; 4 | import std.array; 5 | import std.exception; 6 | import std.file; 7 | import std.path; 8 | import std.stdio; 9 | import std.string; 10 | 11 | import ae.utils.aa; 12 | import ae.utils.array; 13 | import ae.utils.funopt; 14 | import ae.utils.main; 15 | 16 | import ddoc; 17 | import grammar; 18 | import parser; 19 | import writer; 20 | 21 | enum dlangOrgPath = "dlang.org"; 22 | 23 | static immutable string[] extras = [ 24 | "WhiteSpace", 25 | "EndOfLine", 26 | "Comment", 27 | "SpecialTokenSequence", 28 | ]; 29 | 30 | /// Entry point. 31 | void program() 32 | { 33 | if (!exists(dlangOrgPath) && exists("../" ~ dlangOrgPath)) 34 | chdir(".."); 35 | 36 | auto globalMacros = [ 37 | "dlang.org.ddoc", 38 | ] 39 | .map!(fn => dlangOrgPath.buildPath(fn)) 40 | .map!readText 41 | .map!parseMacros 42 | .array; 43 | 44 | string[] files; 45 | void scanTOC(const DDoc ddoc) 46 | { 47 | foreach (ref node; ddoc) 48 | if (node.type == Node.Type.call) 49 | { 50 | if (node.call.macroName == "ITEMIZE") 51 | files = node.call.splitArguments() 52 | .map!((DDoc argument) { 53 | argument = argument.strip(); 54 | enforce(argument.length == 1); 55 | enforce(argument[0].type == Node.Type.call); 56 | enforce(argument[0].call.macroName == "A"); 57 | auto href = argument[0].call.splitArguments()[0].strip; 58 | enforce(href.length == 1); 59 | enforce(href[0].type == Node.Type.text); 60 | enforce(href[0].text.endsWith(".html")); 61 | return href[0].text[0 .. $ - 5]; 62 | }) 63 | .filter!(name => !name.among("abi")) // Skip mangling definition 64 | .array; 65 | else 66 | scanTOC(node.call.contents); 67 | } 68 | } 69 | scanTOC(dlangOrgPath.buildPath("spec", "spec.dd").readText.parseDDoc.contents); 70 | enforce(files.length, "Failed to parse the table of contents (spec/spec.dd)"); 71 | 72 | Grammar grammar; 73 | string[][][string] order; 74 | 75 | foreach (file; files) 76 | { 77 | scope(failure) stderr.writeln("Error in file " ~ file ~ ":"); 78 | auto source = dlangOrgPath.buildPath("spec", file ~ ".dd").readText; 79 | auto ddoc = source.parseDDoc; 80 | 81 | if (source.indexOf(`$(GRAMMAR`) < 0) 82 | continue; 83 | 84 | void scan(ref const Node node) 85 | { 86 | if (node.type != Node.Type.call) 87 | return; 88 | scope(failure) stderr.writefln("Error on line %d:", 89 | 1 + source[0 .. source.sliceIndex(node.call.macroName)].representation.count('\n')); 90 | 91 | if (node.call.macroName == "GRAMMAR" || node.call.macroName == "GRAMMAR_LEX") 92 | { 93 | enforce(node.call.contents.length && 94 | node.call.contents[$-1].type == Node.type.text && 95 | node.call.contents[$-1].isText("\n"), 96 | "Unexpected text at the end of GRAMMAR node" 97 | ); 98 | auto macros = (globalMacros ~ ddoc.macros).fold!merge((DDoc[string]).init); 99 | auto kind = node.call.macroName == "GRAMMAR" ? Grammar.Def.Kind.tokens : Grammar.Def.Kind.chars; 100 | auto newDefs = grammar.parse(node.call.contents, file, macros, kind); 101 | order[file] ~= newDefs; 102 | } 103 | else 104 | foreach (ref childNode; node.call.contents) 105 | scan(childNode); 106 | } 107 | foreach (ref node; ddoc.contents) 108 | scan(node); 109 | } 110 | 111 | grammar.defs["AsmStatement"].node = choice([ 112 | grammar.defs["AsmStatement"].node, 113 | reference("GccAsmStatement"), 114 | ]); 115 | 116 | grammar.analyze(["SourceFile"] ~ extras); 117 | 118 | foreach (defName; ["WhiteSpace", "EndOfLine"]) 119 | grammar.defs[defName].hidden = true; 120 | 121 | auto writer = Writer("../grammar.js", grammar, extras); 122 | 123 | foreach (file; files) 124 | { 125 | writer.startFile(file); 126 | foreach (section; order.get(file, null)) 127 | { 128 | writer.startSection(); 129 | foreach (def; section) 130 | writer.writeRule(def); 131 | } 132 | } 133 | 134 | writer.close(); 135 | } 136 | 137 | mixin main!(funopt!program); 138 | -------------------------------------------------------------------------------- /generator/source/grammar.d: -------------------------------------------------------------------------------- 1 | import std.algorithm.comparison; 2 | import std.algorithm.iteration; 3 | import std.algorithm.searching; 4 | import std.algorithm.sorting; 5 | import std.array; 6 | import std.exception; 7 | import std.format; 8 | import std.functional; 9 | import std.range; 10 | import std.sumtype; 11 | 12 | import ae.utils.aa; 13 | import ae.utils.meta; 14 | import ae.utils.text; 15 | 16 | import ddoc; 17 | 18 | static this() 19 | { 20 | if (false) 21 | { 22 | // Avoid https://issues.dlang.org/show_bug.cgi?id=22010 23 | // (or some similar bug) 24 | Grammar.Node node; 25 | auto b = node == node; 26 | } 27 | } 28 | 29 | struct Grammar 30 | { 31 | struct RegExp { string regexp; } /// Regular expression, generally with the intent to describe some character set. 32 | struct LiteralChars { string chars; } /// Describes contiguous characters (e.g. number syntax) 33 | struct LiteralToken { string literal; } /// May be surrounded by whitespace/comments 34 | struct Reference { string name; } /// Reference to another definition. 35 | struct Choice { Node[] nodes; } /// Choice of multiple possible nodes. 36 | struct Seq { Node[] nodes; } /// Consecutive sequence of nodes. 37 | // https://issues.dlang.org/show_bug.cgi?id=22010 38 | private mixin template OneNode() { Node[/*1*/] nodes; ref Node node() { assert(nodes.length == 1); return nodes[0]; } } 39 | struct Repeat { mixin OneNode; } /// Zero-or-more occurrences of the given node. 40 | struct Repeat1 { mixin OneNode; } /// One-or-more occurrences of the given node. 41 | struct Optional { mixin OneNode; } /// Zero-or-one occurrences of the given node. 42 | struct SeqChoice { Node[][] nodes; } /// Internal node, superset of Choice, Seq and Optional. `nodes` is a list of choices of sequences. 43 | 44 | // https://issues.dlang.org/show_bug.cgi?id=22003 45 | alias NodeValue = SumType!( 46 | RegExp, 47 | LiteralChars, 48 | LiteralToken, 49 | Reference, 50 | Choice, 51 | Repeat, 52 | Repeat1, 53 | Seq, 54 | Optional, 55 | SeqChoice, 56 | ); 57 | 58 | /// A grammar node. 59 | struct Node 60 | { 61 | NodeValue value; 62 | alias value this; 63 | 64 | void toString(scope void delegate(const(char)[]) sink) 65 | { 66 | value.match!( 67 | (ref v) => sink.formattedWrite!"%s"(v), 68 | ); 69 | } 70 | 71 | void toString(scope void delegate(const(char)[]) sink) const 72 | { 73 | value.match!( 74 | (ref v) => sink.formattedWrite!"%s"(v), 75 | ); 76 | } 77 | } 78 | 79 | /// A grammar definition. 80 | /// Emitted as `name: $ => ...` 81 | struct Def 82 | { 83 | Node node; /// The root AST node. 84 | 85 | /// How to emit this definition in the grammar. 86 | enum Kind 87 | { 88 | tokens, /// As a regular rule. 89 | chars, /// As a token(...) rule. 90 | } 91 | Kind kind; /// ditto 92 | 93 | bool used; /// Include the definition in the generated grammar. 94 | bool hidden; /// Hide in the tree-sitter AST (by prefixing the name with _). 95 | bool synthetic; /// We made this one up - don't emit a dlang.org link. 96 | 97 | string publicName; /// If set, use this name instead of the `defs` key. 98 | string[] tail; /// Also write these (synthetic) rules after this one 99 | 100 | HashSet!string definedIn; /// Used to check if GLINK2 links are correct. 101 | } 102 | 103 | /// All definitions in the grammar, indexed by their official names. 104 | Def[string] defs; 105 | 106 | HashSet!(string[2]) links; /// Used to check if GLINK2 links are correct. 107 | 108 | /// Pre-process and prepare for writing 109 | void analyze(string[] roots) 110 | { 111 | checkReferences(); 112 | checkLinks(); 113 | normalize(); 114 | optimize(); 115 | deRecurse(); 116 | extractBodies(); 117 | checkKinds(); 118 | scanUsed(roots); 119 | scanHidden(); 120 | compile(); 121 | } 122 | 123 | // Ensure that all referenced grammar definitions are defined. 124 | private void checkReferences() 125 | { 126 | void scan(Node node) 127 | { 128 | node.match!( 129 | (ref RegExp v) {}, 130 | (ref LiteralChars v) {}, 131 | (ref LiteralToken v) {}, 132 | (ref Reference v) { enforce(v.name in defs, "Unknown reference: " ~ v.name); }, 133 | (ref Choice v) { v.nodes .each!scan(); }, 134 | (ref Seq v) { v.nodes .each!scan(); }, 135 | (ref Repeat v) { v.nodes .each!scan(); }, 136 | (ref Repeat1 v) { v.nodes .each!scan(); }, 137 | (ref Optional v) { v.nodes .each!scan(); }, 138 | (ref SeqChoice v) { v.nodes.joiner.each!scan(); }, 139 | ); 140 | } 141 | foreach (name, ref def; defs) 142 | scan(def.node); 143 | } 144 | 145 | /// Ensure that GLINK2 destinations link to pages 146 | /// which actually contain the linked definitions. 147 | private void checkLinks() 148 | { 149 | foreach (pair; links.keys.sort) 150 | enforce(pair[0] in defs[pair[1]].definedIn, 151 | "Broken link to %s: links to page %s but it is defined in page(s) %-(%s, %)".format( 152 | pair[1], pair[0], defs[pair[1]].definedIn.byKey, 153 | )); 154 | } 155 | 156 | // Convert rules to an intermediate normalized form, which makes other manipulations easier. 157 | // In the normalized form, only the following nodes are allowed: 158 | // - Leaf nodes (RegExp, LiteralChars, LiteralToken) 159 | // - Reference 160 | // - SeqChoice 161 | // - Repeat1 162 | // Seq, Choice, and Optional are expressed as SeqChoice nodes. 163 | // Repeat is expressed as SeqChoice([[], [Repeat1(...)]]). 164 | private void normalize() 165 | { 166 | void normalizeNode(ref Node node) 167 | { 168 | // Normalize children 169 | node.match!( 170 | (ref RegExp v) {}, 171 | (ref LiteralChars v) {}, 172 | (ref LiteralToken v) {}, 173 | (ref Reference v) {}, 174 | (ref Choice v) { v.nodes.each!normalizeNode(); }, 175 | (ref Seq v) { v.nodes.each!normalizeNode(); }, 176 | (ref Repeat v) { v.nodes.each!normalizeNode(); }, 177 | (ref Repeat1 v) { v.nodes.each!normalizeNode(); }, 178 | (ref Optional v) { v.nodes.each!normalizeNode(); }, 179 | (ref SeqChoice v) { unexpected(v); }, 180 | ); 181 | 182 | // Normalize node 183 | node = node.match!( 184 | (ref RegExp v) => node, 185 | (ref LiteralChars v) => node, 186 | (ref LiteralToken v) => node, 187 | (ref Reference v) => node, 188 | (ref Choice v) => seqChoice(v.nodes.map!((ref Node node) => node.match!( 189 | (ref RegExp v) => [[node]], 190 | (ref LiteralChars v) => [[node]], 191 | (ref LiteralToken v) => [[node]], 192 | (ref Reference v) => [[node]], 193 | (ref SeqChoice v) => v.nodes, 194 | (ref Repeat1 v) => [[node]], 195 | (ref _) => unexpected(_).progn(null), 196 | )).join), 197 | (ref Seq v) => seqChoice([v.nodes]), 198 | (ref Repeat v) => seqChoice([[], [repeat1(v.node)]]), 199 | (ref Repeat1 v) => node, 200 | (ref Optional v) => seqChoice([[], v.nodes]), 201 | (ref SeqChoice v) { unexpected(v); return Node.init; }, 202 | ); 203 | } 204 | 205 | foreach (defName, ref def; defs) 206 | normalizeNode(def.node); 207 | } 208 | 209 | // Extract the empty choice from a SeqChoice, if it has one. 210 | // If not, just return null and leave the argument unmodified. 211 | // The return value can then be appended to a choice list to 212 | // re-add the optional choice back in the tree. 213 | private Node[][] extractOptional(ref Node[][] choices) 214 | { 215 | foreach (i, choice; choices) 216 | if (!choice.length) 217 | { 218 | choices = choices[0 .. i] ~ choices[i + 1 .. $]; 219 | return [[]]; 220 | } 221 | return null; 222 | } 223 | 224 | // Optimize the given normalized node in-place. 225 | private void optimizeNode(ref Node node) 226 | { 227 | void optimizeNode(ref Node node) { Grammar.optimizeNode(node); } 228 | 229 | // Optimize children 230 | node.match!( 231 | (ref RegExp v) {}, 232 | (ref LiteralChars v) {}, 233 | (ref LiteralToken v) {}, 234 | (ref Reference v) {}, 235 | (ref SeqChoice v) { v.nodes.joiner.each!optimizeNode(); }, 236 | (ref Repeat1 v) { v.nodes .each!optimizeNode(); }, 237 | (ref _) { unexpected(_); }, 238 | ); 239 | 240 | // Replace unary SeqChoice nodes with their sole contents. 241 | node = node.match!( 242 | (ref SeqChoice v) => v.nodes.length == 1 && v.nodes[0].length == 1 ? v.nodes[0][0] : node, 243 | (ref _) => node, 244 | ); 245 | 246 | // Un-nest single-choice SeqChoice nodes. 247 | node.match!( 248 | (ref SeqChoice v) 249 | { 250 | foreach (ref choice; v.nodes) 251 | foreach_reverse (i; 0 .. choice.length) 252 | choice[i].match!( 253 | (ref SeqChoice v) 254 | { 255 | if (v.nodes.length == 1) // single-choice 256 | choice = choice[0 .. i] ~ v.nodes[0] ~ choice[i + 1 .. $]; 257 | }, 258 | (ref _) {} 259 | ); 260 | }, 261 | (ref _) {}, 262 | ); 263 | 264 | // Collapse redundantly-optional repetition into non-optional repetition. 265 | // x ( | repeat1(x) ) => repeat1(x) 266 | // ( | repeat1(x) ) x => repeat1(x) 267 | node.match!( 268 | (ref SeqChoice v) 269 | { 270 | foreach (ref choice; v.nodes) 271 | foreach_reverse (i; 0 .. choice.length) 272 | { 273 | if (i >= choice.length) 274 | continue; // Already optimized; cursor is outside new range 275 | choice[i].match!( 276 | (ref SeqChoice sc) 277 | { 278 | auto choices = sc.nodes; 279 | if (!extractOptional(choices)) 280 | return; 281 | if (choices.length != 1 || choices[0].length != 1) 282 | return; // Not single-choice (bar optional) or single-length 283 | 284 | choices[0][0].match!( 285 | (ref Repeat1 r) 286 | { 287 | // The list of repeating nodes to try to collapse 288 | auto span = r.node.match!( 289 | (ref SeqChoice scSpan) => scSpan.nodes.length == 1 ? scSpan.nodes[0] : r.nodes, 290 | (ref _) => r.nodes, 291 | ); 292 | 293 | if (choice[0 .. i].endsWith(span)) 294 | choice = choice[0 .. i - span.length] ~ choices[0] ~ choice[i + 1 .. $]; 295 | else 296 | if (choice[i + 1 .. $].startsWith(span)) 297 | choice = choice[0 .. i] ~ choices[0] ~ choice[i + 1 + span.length .. $]; 298 | }, 299 | (ref _) {} 300 | ); 301 | }, 302 | (ref _) {}, 303 | ); 304 | } 305 | }, 306 | (ref _) {}, 307 | ); 308 | 309 | // Given a SeqChoice, try to segment all of its choices such that the set 310 | // concatenation of the two sets containing each segment's halves is the exact set 311 | // of the original choices. This operation is more general than prefix/suffix 312 | // extraction. 313 | // ( a b | a c ) => a ( b | c ) 314 | // ( a b | b ) => ( | a ) b 315 | // a x | a y | b x | b y => ( a | b ) ( x | y ) 316 | node.match!( 317 | (ref SeqChoice sc) 318 | { 319 | auto choices = sc.nodes; 320 | choices = choices.map!flattenChoices.join; 321 | 322 | // Find all choices which have a chance of participating in segmentation. 323 | bool[] choiceViable = choices.map!(choice => 324 | // A choice is minimally viable if any of its constituent nodes occur 325 | // at least once somewhere else in the choice list. 326 | choice.any!((ref Node node) => 327 | choices.map!(choice => 328 | choice.count(node) 329 | ).sum > 1 330 | ) 331 | || choice.length == 0 // Edge case 332 | ).array; 333 | 334 | if (choices.length.iota.filter!(i => choiceViable[i]).walkLength > 15) 335 | return; // Too slow :( 336 | 337 | // Precompute all minimally viable cut points for choices. 338 | bool[][] cutPosViable = choices.map!(choice => 339 | (choice.length + 1).iota.map!(pos => 340 | pos == 0 || pos == choice.length || // redundant / optimization 341 | choices.count!(choice2 => choice2.startsWith(choice[0 .. pos])) > 1 || 342 | choices.count!(choice2 => choice2.endsWith (choice[pos .. $])) > 1 343 | ).array 344 | ).array; 345 | 346 | // How to cut the choice at the given index. 347 | // -1 = doesn't participate in segmentation. 348 | auto cutPos = new sizediff_t[choices.length]; 349 | 350 | // The two sets, represented by the index of some 351 | // choice which is cut according to it. 352 | auto leftSet = new size_t[choices.length]; 353 | auto rightSet = new size_t[choices.length]; 354 | size_t leftSetSize, rightSetSize; 355 | 356 | alias leftChoices = () => leftSetSize.iota.map!(setIndex => 357 | leftSet[setIndex].I!(choiceIndex => 358 | choices[choiceIndex][0 .. cutPos[choiceIndex]] 359 | ) 360 | ); 361 | alias rightChoices = () => rightSetSize.iota.map!(setIndex => 362 | rightSet[setIndex].I!(choiceIndex => 363 | choices[choiceIndex][cutPos[choiceIndex] .. $] 364 | ) 365 | ); 366 | 367 | // Number of choices which do not participate in 368 | // segmentation. 369 | size_t numExcluded; 370 | 371 | // Avoid infinite recursion by only attempting to return (and re-optimize) 372 | // a solution that is better than the status quo. 373 | alias nodeScore = delegate size_t (ref Node node) => node.match!( 374 | (ref SeqChoice sc) => sc.nodes.map!(choice => choice.map!nodeScore.sum).sum, 375 | (ref _) => 1, 376 | ); 377 | 378 | // Best solution found. 379 | size_t bestScore = nodeScore(node); 380 | Node bestNode; 381 | 382 | // Use classic recursive backtracking to iterate 383 | // through all possible valid solutions 384 | void search(size_t choiceIndex) 385 | { 386 | // If the cardinality of the set concatenation exceeds the 387 | // size of the input set, then it certainly contains strings 388 | // which are not part of the input set. 389 | if (leftSetSize * rightSetSize > choices.length - numExcluded) 390 | return; 391 | 392 | // Disallowing either set to grow larger than |choices|/2 greatly 393 | // reduces the execution time, but prevents this algorithm from 394 | // performing basic prefix/suffix extraction. Currently we don't need 395 | // the optimization. 396 | version (none) 397 | if (leftSetSize > (choices.length - numExcluded) / 2 || 398 | rightSetSize > (choices.length - numExcluded) / 2) 399 | return; 400 | 401 | if (choiceIndex < choices.length) 402 | { 403 | auto choice = choices[choiceIndex]; 404 | 405 | // Try segmenting the choice at every viable point 406 | if (choiceViable[choiceIndex]) 407 | foreach_reverse (pos; 0 .. choice.length + 1) 408 | { 409 | if (!cutPosViable[choiceIndex][pos]) 410 | continue; 411 | 412 | cutPos[choiceIndex] = pos; 413 | 414 | auto left = choice[0 .. pos]; 415 | auto right = choice[pos .. $]; 416 | 417 | bool inLeftSet = leftChoices().canFind(left); 418 | bool inRightSet = rightChoices().canFind(right); 419 | if (!inLeftSet) 420 | leftSet[leftSetSize++] = choiceIndex; 421 | if (!inRightSet) 422 | rightSet[rightSetSize++] = choiceIndex; 423 | search(choiceIndex + 1); 424 | if (!inLeftSet) 425 | leftSetSize--; 426 | if (!inRightSet) 427 | rightSetSize--; 428 | } 429 | 430 | // Also try excluding this choice from segmentation 431 | cutPos[choiceIndex] = -1; 432 | numExcluded++; 433 | search(choiceIndex + 1); 434 | numExcluded--; 435 | } 436 | else 437 | { 438 | // scope(failure) 439 | // { 440 | // import std.stdio; 441 | // writeln("Inputs:"); 442 | // foreach (i, choice; choices) 443 | // if (cutPos[i] == -1) 444 | // writeln("- ", choice, " (EXCLUDED)"); 445 | // else 446 | // writeln("- ", choice[0 .. cutPos[i]], " | ", choice[cutPos[i] .. $]); 447 | // writeln("Left set:"); 448 | // foreach (choice; leftChoices()) 449 | // writeln("- ", choice); 450 | // writeln("Right set:"); 451 | // foreach (choice; rightChoices()) 452 | // writeln("- ", choice); 453 | // writefln("Total: %d Excluded: %d Segmented: %d", choices.length, numExcluded, choices.length - numExcluded); 454 | // writeln(); 455 | // writeln(); 456 | // } 457 | 458 | if (numExcluded == choices.length) 459 | return; // Degenerate case - all choices are excluded 460 | if (leftChoices().equal([[]]) || rightChoices().equal([[]])) 461 | return; // Degenerate case - extracting empty prefix/suffix 462 | 463 | // The set concatenation (pair-wise concatenation of Cartesian 464 | // product) of the two sets must result in the original full set 465 | // of choices. 466 | if (leftSetSize * rightSetSize + numExcluded != choices.length) 467 | return; 468 | 469 | size_t score; 470 | foreach (ci; 0 .. choices.length) 471 | if (cutPos[ci] == -1) 472 | score += choices[ci].length; 473 | foreach (choice; leftChoices()) 474 | score += choice.length; 475 | foreach (choice; rightChoices()) 476 | score += choice.length; 477 | 478 | if (score < bestScore) 479 | { 480 | bestScore = score; 481 | 482 | // Excluded choices 483 | auto newChoices = choices.length.iota 484 | .filter!(choiceIndex => cutPos[choiceIndex] == -1) 485 | .map!(choiceIndex => choices[choiceIndex]) 486 | .array; 487 | // Container for the two sets 488 | auto container = seqChoice([[ 489 | seqChoice(leftChoices().array), 490 | seqChoice(rightChoices().array), 491 | ]]); 492 | // Insert the container choice at the first occurrence of a 493 | // refactored choice 494 | auto insertPos = cutPos.countUntil!(pos => pos >= 0); 495 | if (insertPos < 0) 496 | insertPos = 0; 497 | newChoices = newChoices[0 .. insertPos] ~ [container] ~ newChoices[insertPos .. $]; 498 | bestNode = seqChoice(newChoices); 499 | assert(nodeScore(bestNode) == score); 500 | } 501 | } 502 | } 503 | search(0); 504 | 505 | assert(numExcluded == 0 && leftSetSize == 0 && rightSetSize == 0); 506 | 507 | if (bestNode !is Node.init) 508 | { 509 | // Apply solution 510 | node = bestNode; 511 | optimizeNode(node); 512 | } 513 | }, 514 | (ref _) {} 515 | ); 516 | 517 | // Lift the common part (prefix or suffix) out of SeqChoice choices, e.g, transform: 518 | // x | x a | x b | ... => x ( | a | b | ... ) 519 | // We do this if at least two choices have a non-empty common prefix or suffix, 520 | // for every such possible prefix / suffix. 521 | node.match!( 522 | (ref SeqChoice scNode) 523 | { 524 | auto choices = scNode.nodes; 525 | 526 | if (choices.length < 2) 527 | return; // Must have at least two choices 528 | 529 | size_t bestCount; 530 | 531 | foreach (pass; [1, 2]) // Do a first pass to find the biggest group 532 | foreach (i1; 0 .. choices.length) 533 | foreach (i2; i1 + 1 .. choices.length) 534 | { 535 | auto choice1 = choices[i1]; 536 | auto choice2 = choices[i2]; 537 | auto prefix = commonPrefix(choice1 , choice2 ) ; 538 | auto suffix = commonPrefix(choice1.retro, choice2.retro).retro; 539 | if (prefix.length || suffix.length) 540 | { 541 | alias indexIsGrouped = i => 542 | choices[i].startsWith(prefix) && 543 | choices[i].endsWith(suffix) && 544 | choices[i].length >= prefix.length + suffix.length; 545 | auto groupedIndices = choices.length.iota.filter!indexIsGrouped.array; 546 | if (groupedIndices.length < 2) 547 | continue; 548 | 549 | if (pass == 1) 550 | bestCount = max(bestCount, groupedIndices.length); 551 | else 552 | if (groupedIndices.length == bestCount) 553 | { 554 | auto remainingIndices = choices.length.iota.filter!(not!indexIsGrouped); 555 | // auto groupedChoices = groupedIndices.map!(i => choices[i]); 556 | 557 | auto newChoices = remainingIndices.map!(i => choices[i]).array; 558 | // Insert the new group at the first occurrence of the prefix/suffix 559 | auto insertionPoint = groupedIndices.front; 560 | newChoices = 561 | newChoices[0 .. insertionPoint] ~ 562 | chain( 563 | prefix, 564 | seqChoice( 565 | groupedIndices.map!(i => choices[i][prefix.length .. $ - suffix.length]).array 566 | ).only, 567 | suffix, 568 | ).array.only.array ~ 569 | newChoices[insertionPoint .. $]; 570 | 571 | node = seqChoice(newChoices); 572 | optimizeNode(node); 573 | return; 574 | } 575 | } 576 | } 577 | }, 578 | (ref _) {}, 579 | ); 580 | } 581 | 582 | // Fold away unnecessary grammar nodes, simplify the node tree, 583 | // and otherwise prepare it for the transformations to follow. 584 | private void optimize() 585 | { 586 | foreach (ref def; defs) 587 | optimizeNode(def.node); 588 | } 589 | 590 | // Name-based heuristic to decide which nodes to perform 591 | // de-recursion / body-extraction for. 592 | private bool isPlural(string defName) 593 | { 594 | return 595 | // Lists of things generally involve repetition. 596 | defName.splitByCamelCase.canFind("List") || 597 | 598 | // If the definition name is the plural of the name of another definition, 599 | // then this is almost certainly used for repetition. 600 | ["s", "es"].any!(pluralSuffix => 601 | defName.endsWith(pluralSuffix) && 602 | ["", "Name"].any!(singularSuffix => 603 | defName[0 .. $ - pluralSuffix.length] ~ singularSuffix in defs 604 | ) 605 | ); 606 | } 607 | 608 | // Attempt to remove recursion as needed 609 | private void deRecurse() 610 | { 611 | foreach (defName, ref def; defs) 612 | { 613 | // In the D grammar, recursion is used for two purposes: 614 | // - Repetition (e.g. Characters) 615 | // - Nested constructs (e.g. binary expressions) 616 | // We only want to de-recurse the first kind. 617 | bool shouldDeRecurse = 618 | 619 | // We must always de-recurse token fragments, 620 | // because we can't use tree-sitter recursion with them. 621 | def.kind == Def.Kind.chars || 622 | 623 | // Lists of things generally involve repetition. 624 | isPlural(defName) || 625 | 626 | // Additional rules. 627 | defName.among( 628 | "ParameterAttributes", 629 | "AsmInstruction", 630 | ); 631 | 632 | if (shouldDeRecurse) 633 | { 634 | auto x = reference(defName); 635 | 636 | // Transform x := a | b | c x into x := ( | ( c )+ ) ( a | b ) 637 | def.node.match!( 638 | (ref SeqChoice sc1) 639 | { 640 | auto choices = sc1.nodes; 641 | choices = choices.map!flattenChoices.join; 642 | 643 | auto recursiveChoiceIndices = choices.length.iota.filter!( 644 | i => choices[i].canFind(x), 645 | ).array; 646 | if (recursiveChoiceIndices.length != 1) 647 | return; // Single path to recursion 648 | auto recursiveChoiceIndex = recursiveChoiceIndices.front; 649 | auto recursiveChoice = choices[recursiveChoiceIndex]; 650 | if (recursiveChoice.countUntil(x) + 1 != recursiveChoice.length) 651 | return; // More rules follow after recursion 652 | 653 | def.node = seqChoice([[ 654 | // Recursive part 655 | seqChoice([ 656 | [], // Optional (zero-or-more) 657 | [repeat1(seqChoice([ 658 | recursiveChoice[0 .. $ - 1] 659 | ]))], 660 | ]), 661 | // Non-recursive parts 662 | seqChoice( 663 | choices[0 .. recursiveChoiceIndex] ~ choices[recursiveChoiceIndex + 1 .. $], 664 | ), 665 | ]]); 666 | optimizeNode(def.node); 667 | }, 668 | (_) {} 669 | ); 670 | 671 | // Transform x := y ( | z x ) into x := y ( | ( z y )+ ) 672 | def.node.match!( 673 | (ref SeqChoice sc1) 674 | { 675 | if (sc1.nodes.length != 1) 676 | return; // Single choice 677 | if (sc1.nodes[0].length < 2) 678 | return; 679 | 680 | auto y = sc1.nodes[0][0 .. $-1]; 681 | 682 | sc1.nodes[0][$-1].match!( 683 | (ref SeqChoice sc2) 684 | { 685 | 686 | auto choices = sc2.nodes; 687 | if (!extractOptional(choices)) 688 | return; 689 | if (choices.length != 1) 690 | return; 691 | if (choices[0][$-1] != x) 692 | return; 693 | 694 | auto z = choices[0][0 .. $-1]; 695 | 696 | def.node = seqChoice([ 697 | y ~ 698 | seqChoice([ 699 | [], // optional 700 | [repeat1( 701 | seqChoice([ 702 | z ~ 703 | y, 704 | ]) 705 | )], 706 | ]), 707 | ]); 708 | optimizeNode(def.node); 709 | }, 710 | (_) {} 711 | ); 712 | }, 713 | (_) {} 714 | ); 715 | 716 | // Transform x := ( | x z ) y into x := ( | ( y z )+ ) y 717 | // Same as above, but in the other direction. 718 | def.node.match!( 719 | (ref SeqChoice sc1) 720 | { 721 | if (sc1.nodes.length != 1) 722 | return; // Single choice 723 | if (sc1.nodes[0].length < 2) 724 | return; 725 | 726 | auto y = sc1.nodes[0][1 .. $]; 727 | 728 | sc1.nodes[0][0].match!( 729 | (ref SeqChoice sc2) 730 | { 731 | auto choices = sc2.nodes; 732 | if (!extractOptional(choices)) 733 | return; 734 | if (choices.length != 1) 735 | return; 736 | if (choices[0][0] != x) 737 | return; 738 | 739 | auto z = choices[0][1 .. $]; 740 | 741 | def.node = seqChoice([ 742 | seqChoice([ 743 | [], // optional 744 | [repeat1( 745 | seqChoice([ 746 | y ~ 747 | z, 748 | ]) 749 | )], 750 | ]) ~ 751 | y, 752 | ]); 753 | optimizeNode(def.node); 754 | }, 755 | (_) {} 756 | ); 757 | }, 758 | (_) {} 759 | ); 760 | } 761 | } 762 | } 763 | 764 | // Recursively expand all nested choices into a flat list of all possible combinations. 765 | // This form is used for some transformations. 766 | private static Node[][] flattenChoices(Node[] nodes) 767 | { 768 | foreach (i, ref node; nodes) 769 | { 770 | auto result = node.match!( 771 | (ref SeqChoice sc) 772 | { 773 | assert(sc.nodes.length > 1); 774 | Node[][] result; 775 | foreach (choice; sc.nodes.map!flattenChoices.joiner) 776 | foreach (rightChoice; flattenChoices(nodes[i + 1 .. $])) 777 | result ~= nodes[0 .. i] ~ choice ~ rightChoice; 778 | return result; 779 | }, 780 | (ref _) => null, 781 | ); 782 | if (result) 783 | return result; 784 | } 785 | return [nodes]; 786 | } 787 | 788 | // Refactor some definitions into a descending part and an 789 | // implementation part, so that we can hide the descending 790 | // part to avoid excessive nesting in the tree-sitter AST. 791 | // This aims to solve the problem described in 792 | // http://tree-sitter.github.io/tree-sitter/creating-parsers#structuring-rules-well , 793 | // though using a different approach. 794 | private void extractBodies() 795 | { 796 | foreach (defName; defs.keys) 797 | { 798 | auto def = &defs[defName]; 799 | 800 | if (def.kind != Def.Kind.tokens) 801 | continue; 802 | 803 | // The rule of thumb to decide whether a rule should have its body extracted 804 | // is to see if the rule name makes sense even with just the minimal, 805 | // non-body interpretation of the definition. 806 | // E.g., an AddExpression is expected to always have an addition, 807 | // but an Import is an import even without a ModuleAliasIdentifier. 808 | 809 | // The following grammar definitions are eligible for body extraction, 810 | // but it doesn't make sense to do so for them. 811 | // As far as I can see, there is no way to mechanically distinguish these cases 812 | // from the majority of cases where body extraction is desirable. 813 | if (defName.among( 814 | "SourceFile", 815 | "Import", 816 | "Slice", // needs to be de-recursed 817 | "Symbol", 818 | "AssertArguments", // uses AssignExpression 819 | )) 820 | continue; 821 | 822 | // One way we can decide whether to perform body 823 | // extraction is to check if one of the choices that the 824 | // definition can resolve to is a reference to a very 825 | // generic rule, such as Identifier. In this case, it is 826 | // generally valuable to preserve this node in the AST, as 827 | // it provides information over the generic rule. 828 | bool wrapsGeneric = def.node.match!( 829 | (ref SeqChoice sc) => sc.nodes.map!flattenChoices.joiner.any!(choice => 830 | choice.length == 1 && choice[0].match!( 831 | (ref Reference r) => r.name.among( 832 | "Identifier", 833 | "DeclDefs", 834 | "NonVoidInitializer", 835 | // "AssignExpression", // Also used for descending 836 | "BasicType", 837 | "Parameters", 838 | "InOutStatement", 839 | "IntegerLiteral", 840 | "Declaration", 841 | "BlockStatement", 842 | "Type", 843 | "Opcode", 844 | ), 845 | (ref _) => false, 846 | )), 847 | (ref _) => false, 848 | ); 849 | if (wrapsGeneric) 850 | continue; 851 | 852 | // Another heuristic we can use is to check if the name 853 | // suggests repetition. An example is Packages: it is 854 | // recursive, but unlike e.g. OrOrExpression (which is 855 | // also recursive), we don't want to perform body 856 | // extraction on it. 857 | if (isPlural(defName)) 858 | continue; 859 | 860 | auto x = reference(defName); 861 | 862 | /* 863 | x := y ( | a... | b... ) z 864 | => 865 | x := y z | x_ts_body 866 | x_ts_body := y ( a... | b... ) z 867 | 868 | - y and z are the mandatory descending part (must be references) 869 | - a, b, ... are the implementation part, which we will extract to a separate rule 870 | These should contain a token or such (i.e. consist of not just all references). 871 | */ 872 | def.node.match!( 873 | (ref SeqChoice sc1) 874 | { 875 | if (sc1.nodes.length != 1) 876 | return; // Single choice 877 | 878 | auto optionalIndex = 879 | sc1.nodes[0].countUntil!((ref Node node) => node.match!( 880 | (ref SeqChoice sc2) => sc2.nodes.canFind(null), 881 | (ref _) => false 882 | )); 883 | if (optionalIndex < 0) 884 | return; 885 | 886 | auto y = sc1.nodes[0][0 .. optionalIndex]; 887 | auto z = sc1.nodes[0][optionalIndex + 1 .. $]; 888 | auto y_z = y ~ z; 889 | if (y_z.length != 1) // Match logic in scanHidden 890 | return; 891 | bool yzOK = y_z.all!((ref Node node) => node.match!( 892 | (ref Reference v) => true, 893 | (ref _) => false, 894 | )); 895 | if (!yzOK) 896 | return; 897 | 898 | auto choices = sc1.nodes[0][optionalIndex].tryMatch!( 899 | (ref SeqChoice sc2) => sc2.nodes, 900 | ); 901 | extractOptional(choices).enforce(); 902 | alias choicesOK = delegate bool (choices) => choices.all!(choice => choice.any!((ref Node node) => node.match!( 903 | (ref RegExp v) => true, 904 | (ref LiteralChars v) => true, 905 | (ref LiteralToken v) => true, 906 | (ref SeqChoice v) => choicesOK(v.nodes), 907 | (ref _) => false, 908 | ))); 909 | if (!choicesOK(choices)) 910 | return; 911 | 912 | auto bodyName = defName ~ "TSBody"; 913 | def.node = seqChoice([ 914 | y_z, 915 | [reference(bodyName)], 916 | ]); 917 | def.tail ~= bodyName; 918 | def.publicName = "Maybe" ~ (def.publicName ? def.publicName : defName); 919 | 920 | Def bodyDef; 921 | bodyDef.node = seqChoice([y ~ seqChoice(choices) ~ z]); 922 | bodyDef.kind = Def.Kind.tokens; 923 | bodyDef.synthetic = true; 924 | bodyDef.publicName = defName; 925 | 926 | optimizeNode(def.node); 927 | optimizeNode(bodyDef.node); 928 | 929 | defs[bodyName] = bodyDef; 930 | }, 931 | (ref _) {} 932 | ); 933 | 934 | /* 935 | x := choice( 936 | // Some choices are references (descending part) 937 | reference(...), 938 | reference(...), 939 | 940 | // Some choices are sequences (implementation part) 941 | seq(...), 942 | seq(...), 943 | ) 944 | 945 | => 946 | 947 | x := choice( 948 | reference(...), 949 | reference(...), 950 | reference(x_ts_body), 951 | ) 952 | 953 | x_ts_body := choice( 954 | seq(...), 955 | seq(...), 956 | ) 957 | */ 958 | def.node.match!( 959 | (ref SeqChoice sc1) 960 | { 961 | auto choices = sc1.nodes; 962 | choices = choices.map!flattenChoices.join; 963 | 964 | alias isReference = (Node[] nodes) => nodes.length == 1 && nodes[0].match!( 965 | (ref Reference v) => true, 966 | (_) => false, 967 | ); 968 | auto references = choices.filter!isReference.array; 969 | auto remainder = choices.filter!(not!isReference).array; 970 | if (!references || !remainder) 971 | return; 972 | 973 | auto bodyName = defName ~ "TSBody"; 974 | def.node = seqChoice( 975 | references ~ 976 | [reference(bodyName)], 977 | ); 978 | def.tail ~= bodyName; 979 | def.publicName = "Maybe" ~ (def.publicName ? def.publicName : defName); 980 | 981 | Def bodyDef; 982 | bodyDef.node = seqChoice( 983 | remainder, 984 | ); 985 | bodyDef.kind = Def.Kind.tokens; 986 | bodyDef.synthetic = true; 987 | bodyDef.publicName = defName; 988 | 989 | optimizeNode(def.node); 990 | optimizeNode(bodyDef.node); 991 | 992 | defs[bodyName] = bodyDef; 993 | }, 994 | (ref _) {} 995 | ); 996 | } 997 | } 998 | 999 | // Verify our assertions about definitions of the respective kind. 1000 | private void checkKinds() 1001 | { 1002 | foreach (defName, ref def; defs) 1003 | final switch (def.kind) 1004 | { 1005 | case Def.Kind.chars: 1006 | { 1007 | enum State : ubyte 1008 | { 1009 | hasChars = 1 << 0, 1010 | hasToken = 1 << 1, 1011 | recurses = 1 << 2, 1012 | } 1013 | 1014 | HashSet!string scanning; 1015 | 1016 | State checkDef(string defName) 1017 | { 1018 | scope(failure) { import std.stdio; stderr.writefln("While checking %s:", defName); } 1019 | if (defName in scanning) 1020 | return State.recurses; 1021 | scanning.add(defName); 1022 | scope(success) scanning.remove(defName); 1023 | 1024 | State concat(State a, State b) 1025 | { 1026 | if (((a & State.hasToken) && b != 0) || 1027 | ((b & State.hasToken) && a != 0)) 1028 | throw new Exception("Token / token fragment definition %s contains mixed %s and %s".format(defName, a, b)); 1029 | return a | b; 1030 | } 1031 | 1032 | State scanNode(ref Node node) 1033 | { 1034 | return node.match!( 1035 | (ref RegExp v) => State.init, 1036 | (ref LiteralChars v) => State.hasChars, 1037 | (ref LiteralToken v) => State.hasToken, 1038 | (ref Reference v) { enforce(defs[v.name].kind == Def.Kind.chars, "%s of kind %s references %s of kind %s".format(defName, def.kind, v.name, defs[v.name].kind)); return checkDef(v.name); }, 1039 | (ref Repeat1 v) => v.node.I!scanNode().I!(x => concat(x, x)), 1040 | (ref SeqChoice v) => v.nodes.map!(choiceSeq => choiceSeq.map!scanNode().fold!concat(State.init)).fold!((a, b) => State(a | b)), 1041 | (ref _) { unexpected(_); return State.init; }, 1042 | ); 1043 | } 1044 | return scanNode(defs[defName].node); 1045 | } 1046 | 1047 | checkDef(defName); 1048 | break; 1049 | } 1050 | 1051 | case Def.Kind.tokens: 1052 | { 1053 | void scanNode(ref Node node) 1054 | { 1055 | node.match!( 1056 | (ref RegExp v) {}, 1057 | (ref LiteralChars v) { throw new Exception("Definition %s with kind %s has literal chars: %(%s%)".format(defName, def.kind, [v.chars])); }, 1058 | (ref LiteralToken v) {}, 1059 | (ref Reference v) {}, 1060 | (ref Repeat1 v) { v.nodes .each!scanNode(); }, 1061 | (ref SeqChoice v) { v.nodes.joiner.each!scanNode(); }, 1062 | (ref _) { unexpected(_); }, 1063 | ); 1064 | } 1065 | scanNode(def.node); 1066 | break; 1067 | } 1068 | } 1069 | } 1070 | 1071 | // Recursively visit definitions starting from `roots` to find 1072 | // which ones are used and should be generated grammar. 1073 | private void scanUsed(string[] roots) 1074 | { 1075 | void scanDef(string defName) 1076 | { 1077 | auto def = &defs[defName]; 1078 | if (def.used) 1079 | return; 1080 | def.used = true; 1081 | if (def.kind == Def.Kind.chars) 1082 | return; // Referencees will be inlined 1083 | 1084 | void scanNode(ref Node node) 1085 | { 1086 | node.match!( 1087 | (ref RegExp v) {}, 1088 | (ref LiteralChars v) {}, 1089 | (ref LiteralToken v) {}, 1090 | (ref Reference v) { scanDef(v.name); }, 1091 | (ref Repeat1 v) { v.nodes .each!scanNode(); }, 1092 | (ref SeqChoice v) { v.nodes.joiner.each!scanNode(); }, 1093 | (ref _) { unexpected(_); }, 1094 | ); 1095 | } 1096 | scanNode(def.node); 1097 | } 1098 | 1099 | foreach (root; roots) 1100 | scanDef(root); 1101 | } 1102 | 1103 | // Choose which definitions should be hidden (inlined) in the tree-sitter AST. 1104 | // In the generated grammar, such definitions' names begin with an underscore. 1105 | private void scanHidden() 1106 | { 1107 | foreach (defName, ref def; defs) 1108 | { 1109 | if (def.kind == Def.Kind.chars) 1110 | continue; // Always represents a token; referencees are inlined 1111 | 1112 | // We make a definition hidden if it always contains at most one other definition. 1113 | // Definitions which directly contain tokens are never hidden. 1114 | 1115 | // Exception: nodes which contain only one reference and nothing else 1116 | // are implicitly understood to have semantic meaning, and are not hidden. 1117 | if (def.node.match!( 1118 | (ref Reference v) => true, 1119 | (ref _) => false, 1120 | )) 1121 | continue; 1122 | 1123 | size_t scanNode(ref Node node) 1124 | { 1125 | return node.match!( 1126 | (ref RegExp v) => unexpected(v).progn(0), 1127 | (ref LiteralChars v) => unexpected(v).progn(0), 1128 | (ref LiteralToken v) => 2, 1129 | (ref Reference v) => 1, 1130 | (ref Repeat1 v) => v.nodes.each!scanNode() * 2, 1131 | (ref SeqChoice v) => v.nodes.map!(choiceSeq => choiceSeq.map!scanNode().sum()).reduce!max, 1132 | (ref _) => unexpected(_).progn(0), 1133 | ); 1134 | } 1135 | def.hidden = scanNode(def.node) <= 1; 1136 | } 1137 | } 1138 | 1139 | // Convert rules from the internal normalized form to the tree-sitter form. 1140 | // This replaces SeqChoice nodes with Seq / Choice / Optional. 1141 | private void compile() 1142 | { 1143 | void compileNode(ref Node node) 1144 | { 1145 | // Compile children 1146 | node.match!( 1147 | (ref RegExp v) {}, 1148 | (ref LiteralChars v) {}, 1149 | (ref LiteralToken v) {}, 1150 | (ref Reference v) {}, 1151 | (ref Choice v) { unexpected(v); }, 1152 | (ref Seq v) { unexpected(v); }, 1153 | (ref Repeat v) { unexpected(v); }, 1154 | (ref Repeat1 v) { v.nodes .each!compileNode(); }, 1155 | (ref Optional v) { unexpected(v); }, 1156 | (ref SeqChoice v) { v.nodes.joiner.each!compileNode(); }, 1157 | ); 1158 | 1159 | // Compile node 1160 | node = node.match!( 1161 | (ref RegExp v) => node, 1162 | (ref LiteralChars v) => node, 1163 | (ref LiteralToken v) => node, 1164 | (ref Reference v) => node, 1165 | (ref SeqChoice v) 1166 | { 1167 | auto optionalChoice = extractOptional(v.nodes); 1168 | 1169 | alias maybeSeq = (Node[] nodes) => nodes.length == 1 ? nodes[0] : seq(nodes); 1170 | 1171 | node = v.nodes.length == 1 ? maybeSeq(v.nodes[0]) : choice(v.nodes.map!maybeSeq.array); 1172 | 1173 | if (optionalChoice) 1174 | { 1175 | // optional(repeat1(...)) -> repeat(...) 1176 | node = node.match!( 1177 | (ref Repeat1 v) => repeat(v.node), 1178 | (ref _) => optional(node), 1179 | ); 1180 | } 1181 | 1182 | return node; 1183 | }, 1184 | (ref Repeat1 v) => node, 1185 | (ref _) { unexpected(_); return Node.init; }, 1186 | ); 1187 | } 1188 | 1189 | foreach (defName, ref def; defs) 1190 | compileNode(def.node); 1191 | } 1192 | } 1193 | 1194 | /// Convenience factory functions. 1195 | Grammar.Node regexp (string regexp ) { return Grammar.Node(Grammar.NodeValue(Grammar.RegExp ( regexp ))); } 1196 | Grammar.Node literalChars(string chars ) { return Grammar.Node(Grammar.NodeValue(Grammar.LiteralChars( chars ))); } /// ditto 1197 | Grammar.Node literalToken(string literal) { return Grammar.Node(Grammar.NodeValue(Grammar.LiteralToken( literal ))); } /// ditto 1198 | Grammar.Node reference (string name ) { return Grammar.Node(Grammar.NodeValue(Grammar.Reference ( name ))); } /// ditto 1199 | Grammar.Node choice (Grammar.Node[] nodes ) { return Grammar.Node(Grammar.NodeValue(Grammar.Choice ( nodes ))); } /// ditto 1200 | Grammar.Node seq (Grammar.Node[] nodes ) { return Grammar.Node(Grammar.NodeValue(Grammar.Seq ( nodes ))); } /// ditto 1201 | Grammar.Node repeat (Grammar.Node node ) { return Grammar.Node(Grammar.NodeValue(Grammar.Repeat ([node ]))); } /// ditto 1202 | Grammar.Node repeat1 (Grammar.Node node ) { return Grammar.Node(Grammar.NodeValue(Grammar.Repeat1 ([node ]))); } /// ditto 1203 | Grammar.Node optional (Grammar.Node node ) { return Grammar.Node(Grammar.NodeValue(Grammar.Optional ([node ]))); } /// ditto 1204 | Grammar.Node seqChoice (Grammar.Node[][] nodes ) { return Grammar.Node(Grammar.NodeValue(Grammar.SeqChoice ( nodes ))); } /// ditto 1205 | 1206 | private void unexpected(T)(auto ref T v) { assert(false, "Unexpected " ~ T.stringof); } 1207 | -------------------------------------------------------------------------------- /generator/source/parser.d: -------------------------------------------------------------------------------- 1 | module parser; 2 | 3 | import std.algorithm.comparison; 4 | import std.algorithm.iteration; 5 | import std.algorithm.searching; 6 | import std.array; 7 | import std.conv : to; 8 | import std.exception; 9 | import std.string; 10 | 11 | import ddoc; 12 | import grammar; 13 | 14 | string[] parse(ref Grammar grammar, const DDoc ddoc, string fileName, DDoc[string] macros, Grammar.Def.Kind kind) 15 | { 16 | alias RegExp = Grammar.RegExp; 17 | alias LiteralChars = Grammar.LiteralChars; 18 | alias LiteralToken = Grammar.LiteralToken; 19 | alias Reference = Grammar.Reference; 20 | alias Optional = Grammar.Optional; 21 | alias Choice = Grammar.Choice; 22 | alias Seq = Grammar.Seq; 23 | 24 | alias NodeValue = Grammar.NodeValue; 25 | alias Node = Grammar.Node; 26 | alias Def = Grammar.Def; 27 | 28 | static DDoc preprocess(const DDoc ddoc) 29 | { 30 | DDoc result; 31 | foreach (ref node; ddoc) 32 | if (node.type != .Node.Type.call) 33 | result ~= node; 34 | else 35 | if (node.isCallTo("MULTICOLS")) 36 | result ~= node.call.splitArguments()[1]; 37 | else 38 | { 39 | .Node node2 = node; 40 | node2.call.contents = preprocess(node.call.contents); 41 | result ~= node2; 42 | } 43 | return result; 44 | } 45 | 46 | struct ParseContext 47 | { 48 | string currentName; 49 | string file; 50 | DDoc[string] macros; 51 | Def.Kind kind; 52 | } 53 | 54 | /*static*/ Node[] parseDefinition(const DDoc line, ref const ParseContext context) 55 | { 56 | scope(failure) { import std.stdio : stderr; stderr.writeln("Error with line: ", line); } 57 | Node[] seqNodes; 58 | foreach (ref node; line) 59 | { 60 | if (node.type == .Node.Type.text) 61 | enforce(!node.text.strip.length, "Bare text node (%(%s%)) in grammar: %s".format([node.text], line)); 62 | else 63 | if (node.isCallTo("I")) 64 | { 65 | auto text = node.getSingleTextChild(); 66 | switch (text) 67 | { 68 | case "any Unicode character": 69 | seqNodes ~= regexp(`/[\s\S]/`); 70 | break; 71 | case "physical end of the file": 72 | seqNodes ~= regexp(`/$/m`); // illustrative 73 | break; 74 | case "Letter": 75 | seqNodes ~= regexp(`/[A-Za-z]/`); 76 | break; 77 | case "UniversalAlpha": 78 | // src/dmd/utf.d 79 | static immutable wchar[2][] ALPHA_TABLE = 80 | [ 81 | [0x00AA, 0x00AA], 82 | [0x00B5, 0x00B5], 83 | [0x00B7, 0x00B7], 84 | [0x00BA, 0x00BA], 85 | [0x00C0, 0x00D6], 86 | [0x00D8, 0x00F6], 87 | [0x00F8, 0x01F5], 88 | [0x01FA, 0x0217], 89 | [0x0250, 0x02A8], 90 | [0x02B0, 0x02B8], 91 | [0x02BB, 0x02BB], 92 | [0x02BD, 0x02C1], 93 | [0x02D0, 0x02D1], 94 | [0x02E0, 0x02E4], 95 | [0x037A, 0x037A], 96 | [0x0386, 0x0386], 97 | [0x0388, 0x038A], 98 | [0x038C, 0x038C], 99 | [0x038E, 0x03A1], 100 | [0x03A3, 0x03CE], 101 | [0x03D0, 0x03D6], 102 | [0x03DA, 0x03DA], 103 | [0x03DC, 0x03DC], 104 | [0x03DE, 0x03DE], 105 | [0x03E0, 0x03E0], 106 | [0x03E2, 0x03F3], 107 | [0x0401, 0x040C], 108 | [0x040E, 0x044F], 109 | [0x0451, 0x045C], 110 | [0x045E, 0x0481], 111 | [0x0490, 0x04C4], 112 | [0x04C7, 0x04C8], 113 | [0x04CB, 0x04CC], 114 | [0x04D0, 0x04EB], 115 | [0x04EE, 0x04F5], 116 | [0x04F8, 0x04F9], 117 | [0x0531, 0x0556], 118 | [0x0559, 0x0559], 119 | [0x0561, 0x0587], 120 | [0x05B0, 0x05B9], 121 | [0x05BB, 0x05BD], 122 | [0x05BF, 0x05BF], 123 | [0x05C1, 0x05C2], 124 | [0x05D0, 0x05EA], 125 | [0x05F0, 0x05F2], 126 | [0x0621, 0x063A], 127 | [0x0640, 0x0652], 128 | [0x0660, 0x0669], 129 | [0x0670, 0x06B7], 130 | [0x06BA, 0x06BE], 131 | [0x06C0, 0x06CE], 132 | [0x06D0, 0x06DC], 133 | [0x06E5, 0x06E8], 134 | [0x06EA, 0x06ED], 135 | [0x06F0, 0x06F9], 136 | [0x0901, 0x0903], 137 | [0x0905, 0x0939], 138 | [0x093D, 0x094D], 139 | [0x0950, 0x0952], 140 | [0x0958, 0x0963], 141 | [0x0966, 0x096F], 142 | [0x0981, 0x0983], 143 | [0x0985, 0x098C], 144 | [0x098F, 0x0990], 145 | [0x0993, 0x09A8], 146 | [0x09AA, 0x09B0], 147 | [0x09B2, 0x09B2], 148 | [0x09B6, 0x09B9], 149 | [0x09BE, 0x09C4], 150 | [0x09C7, 0x09C8], 151 | [0x09CB, 0x09CD], 152 | [0x09DC, 0x09DD], 153 | [0x09DF, 0x09E3], 154 | [0x09E6, 0x09F1], 155 | [0x0A02, 0x0A02], 156 | [0x0A05, 0x0A0A], 157 | [0x0A0F, 0x0A10], 158 | [0x0A13, 0x0A28], 159 | [0x0A2A, 0x0A30], 160 | [0x0A32, 0x0A33], 161 | [0x0A35, 0x0A36], 162 | [0x0A38, 0x0A39], 163 | [0x0A3E, 0x0A42], 164 | [0x0A47, 0x0A48], 165 | [0x0A4B, 0x0A4D], 166 | [0x0A59, 0x0A5C], 167 | [0x0A5E, 0x0A5E], 168 | [0x0A66, 0x0A6F], 169 | [0x0A74, 0x0A74], 170 | [0x0A81, 0x0A83], 171 | [0x0A85, 0x0A8B], 172 | [0x0A8D, 0x0A8D], 173 | [0x0A8F, 0x0A91], 174 | [0x0A93, 0x0AA8], 175 | [0x0AAA, 0x0AB0], 176 | [0x0AB2, 0x0AB3], 177 | [0x0AB5, 0x0AB9], 178 | [0x0ABD, 0x0AC5], 179 | [0x0AC7, 0x0AC9], 180 | [0x0ACB, 0x0ACD], 181 | [0x0AD0, 0x0AD0], 182 | [0x0AE0, 0x0AE0], 183 | [0x0AE6, 0x0AEF], 184 | [0x0B01, 0x0B03], 185 | [0x0B05, 0x0B0C], 186 | [0x0B0F, 0x0B10], 187 | [0x0B13, 0x0B28], 188 | [0x0B2A, 0x0B30], 189 | [0x0B32, 0x0B33], 190 | [0x0B36, 0x0B39], 191 | [0x0B3D, 0x0B43], 192 | [0x0B47, 0x0B48], 193 | [0x0B4B, 0x0B4D], 194 | [0x0B5C, 0x0B5D], 195 | [0x0B5F, 0x0B61], 196 | [0x0B66, 0x0B6F], 197 | [0x0B82, 0x0B83], 198 | [0x0B85, 0x0B8A], 199 | [0x0B8E, 0x0B90], 200 | [0x0B92, 0x0B95], 201 | [0x0B99, 0x0B9A], 202 | [0x0B9C, 0x0B9C], 203 | [0x0B9E, 0x0B9F], 204 | [0x0BA3, 0x0BA4], 205 | [0x0BA8, 0x0BAA], 206 | [0x0BAE, 0x0BB5], 207 | [0x0BB7, 0x0BB9], 208 | [0x0BBE, 0x0BC2], 209 | [0x0BC6, 0x0BC8], 210 | [0x0BCA, 0x0BCD], 211 | [0x0BE7, 0x0BEF], 212 | [0x0C01, 0x0C03], 213 | [0x0C05, 0x0C0C], 214 | [0x0C0E, 0x0C10], 215 | [0x0C12, 0x0C28], 216 | [0x0C2A, 0x0C33], 217 | [0x0C35, 0x0C39], 218 | [0x0C3E, 0x0C44], 219 | [0x0C46, 0x0C48], 220 | [0x0C4A, 0x0C4D], 221 | [0x0C60, 0x0C61], 222 | [0x0C66, 0x0C6F], 223 | [0x0C82, 0x0C83], 224 | [0x0C85, 0x0C8C], 225 | [0x0C8E, 0x0C90], 226 | [0x0C92, 0x0CA8], 227 | [0x0CAA, 0x0CB3], 228 | [0x0CB5, 0x0CB9], 229 | [0x0CBE, 0x0CC4], 230 | [0x0CC6, 0x0CC8], 231 | [0x0CCA, 0x0CCD], 232 | [0x0CDE, 0x0CDE], 233 | [0x0CE0, 0x0CE1], 234 | [0x0CE6, 0x0CEF], 235 | [0x0D02, 0x0D03], 236 | [0x0D05, 0x0D0C], 237 | [0x0D0E, 0x0D10], 238 | [0x0D12, 0x0D28], 239 | [0x0D2A, 0x0D39], 240 | [0x0D3E, 0x0D43], 241 | [0x0D46, 0x0D48], 242 | [0x0D4A, 0x0D4D], 243 | [0x0D60, 0x0D61], 244 | [0x0D66, 0x0D6F], 245 | [0x0E01, 0x0E3A], 246 | [0x0E40, 0x0E5B], 247 | [0x0E81, 0x0E82], 248 | [0x0E84, 0x0E84], 249 | [0x0E87, 0x0E88], 250 | [0x0E8A, 0x0E8A], 251 | [0x0E8D, 0x0E8D], 252 | [0x0E94, 0x0E97], 253 | [0x0E99, 0x0E9F], 254 | [0x0EA1, 0x0EA3], 255 | [0x0EA5, 0x0EA5], 256 | [0x0EA7, 0x0EA7], 257 | [0x0EAA, 0x0EAB], 258 | [0x0EAD, 0x0EAE], 259 | [0x0EB0, 0x0EB9], 260 | [0x0EBB, 0x0EBD], 261 | [0x0EC0, 0x0EC4], 262 | [0x0EC6, 0x0EC6], 263 | [0x0EC8, 0x0ECD], 264 | [0x0ED0, 0x0ED9], 265 | [0x0EDC, 0x0EDD], 266 | [0x0F00, 0x0F00], 267 | [0x0F18, 0x0F19], 268 | [0x0F20, 0x0F33], 269 | [0x0F35, 0x0F35], 270 | [0x0F37, 0x0F37], 271 | [0x0F39, 0x0F39], 272 | [0x0F3E, 0x0F47], 273 | [0x0F49, 0x0F69], 274 | [0x0F71, 0x0F84], 275 | [0x0F86, 0x0F8B], 276 | [0x0F90, 0x0F95], 277 | [0x0F97, 0x0F97], 278 | [0x0F99, 0x0FAD], 279 | [0x0FB1, 0x0FB7], 280 | [0x0FB9, 0x0FB9], 281 | [0x10A0, 0x10C5], 282 | [0x10D0, 0x10F6], 283 | [0x1E00, 0x1E9B], 284 | [0x1EA0, 0x1EF9], 285 | [0x1F00, 0x1F15], 286 | [0x1F18, 0x1F1D], 287 | [0x1F20, 0x1F45], 288 | [0x1F48, 0x1F4D], 289 | [0x1F50, 0x1F57], 290 | [0x1F59, 0x1F59], 291 | [0x1F5B, 0x1F5B], 292 | [0x1F5D, 0x1F5D], 293 | [0x1F5F, 0x1F7D], 294 | [0x1F80, 0x1FB4], 295 | [0x1FB6, 0x1FBC], 296 | [0x1FBE, 0x1FBE], 297 | [0x1FC2, 0x1FC4], 298 | [0x1FC6, 0x1FCC], 299 | [0x1FD0, 0x1FD3], 300 | [0x1FD6, 0x1FDB], 301 | [0x1FE0, 0x1FEC], 302 | [0x1FF2, 0x1FF4], 303 | [0x1FF6, 0x1FFC], 304 | [0x203F, 0x2040], 305 | [0x207F, 0x207F], 306 | [0x2102, 0x2102], 307 | [0x2107, 0x2107], 308 | [0x210A, 0x2113], 309 | [0x2115, 0x2115], 310 | [0x2118, 0x211D], 311 | [0x2124, 0x2124], 312 | [0x2126, 0x2126], 313 | [0x2128, 0x2128], 314 | [0x212A, 0x2131], 315 | [0x2133, 0x2138], 316 | [0x2160, 0x2182], 317 | [0x3005, 0x3007], 318 | [0x3021, 0x3029], 319 | [0x3041, 0x3093], 320 | [0x309B, 0x309C], 321 | [0x30A1, 0x30F6], 322 | [0x30FB, 0x30FC], 323 | [0x3105, 0x312C], 324 | [0x4E00, 0x9FA5], 325 | [0xAC00, 0xD7A3], 326 | ]; 327 | seqNodes ~= regexp(`/[%-(%s%)]/`.format(ALPHA_TABLE.map!(r => 328 | r[0] == r[1] 329 | ? `\u%04x`.format(r[0]) 330 | : `\u%04x-\u%04x`.format(r[0], r[1]) 331 | ))); 332 | break; 333 | default: 334 | throw new Exception("Unknown I: " ~ text); 335 | } 336 | } 337 | else 338 | if (node.isCallTo("B")) 339 | { 340 | auto text = node.call.contents.toString(); 341 | enforce(context.kind == Def.Kind.chars, `B in GRAMMAR block: ` ~ text); 342 | if (text.length == 6 && text.startsWith(`\u`)) 343 | seqNodes ~= literalChars(wchar(text[2 .. $].to!ushort(16)).to!string); 344 | else 345 | { 346 | // These are to aid fixing usage of $(D ...)/$(B ...) in the spec 347 | enforce(text.among( 348 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 349 | "a", "b", "c", "d", "e", "f", 350 | "A", "B", "C", "D", "E", "F", 351 | "/*", 352 | "*/", 353 | "//", 354 | "/+", 355 | "+/", 356 | `r"`, 357 | `"`, 358 | "`", 359 | "'", 360 | "c", 361 | "w", 362 | "d", 363 | `q"`, 364 | `q"(`, `)"`, 365 | `q"[`, `]"`, 366 | `q"{`, `}"`, 367 | `q"<`, `>"`, 368 | `(`, `[`, `<`, `{`, 369 | `)`, `]`, `>`, `}`, 370 | "L", "u", "U", 371 | "Lu", "LU", 372 | "uL", "UL", 373 | "0b", 374 | "0B", 375 | "_", 376 | ".", 377 | `\'`, 378 | `\"`, 379 | `\?`, 380 | `\`, 381 | `\0`, 382 | `\a`, 383 | `\b`, 384 | `\f`, 385 | `\n`, 386 | `\r`, 387 | `\t`, 388 | `\v`, 389 | `\x`, 390 | `\\`, 391 | `\u`, 392 | `\U`, 393 | `x"`, 394 | `e+`, 395 | `E+`, 396 | `e-`, 397 | `E-`, 398 | `0x`, 399 | `0X`, 400 | `p`, 401 | `P`, 402 | `p+`, 403 | `P+`, 404 | `p-`, 405 | `P-`, 406 | `i`, 407 | `&`, 408 | `;`, 409 | `#!`, 410 | ), "Unknown B: " ~ text); 411 | seqNodes ~= literalChars(text); 412 | } 413 | } 414 | else 415 | if (node.isCallTo("D")) 416 | { 417 | // ditto 418 | auto text = node.call.contents.toString(); 419 | enforce(text.length); 420 | foreach (word; text.split) 421 | { 422 | enforce( 423 | // keywords 424 | (word.length >= 2 && word.representation.all!(c => "abcdefghijklmnopqrstuvwxyz_".representation.canFind(c))) || 425 | // traits 426 | (["is", "has", "get"].any!(prefix => word.startsWith(prefix)) && word.representation.all!(c => "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".representation.canFind(c))) || 427 | // magic keywords 428 | (word.startsWith("__") && word.endsWith("__") && word[2 .. $-2].representation.all!(c => "ABCDEFGHIJKLMNOPQRSTUVWXYZ_".representation.canFind(c))) || 429 | // registers 430 | (word.length >= 2 && "ABCDEFGHIJKLMNOPQRSTUVWXYZ".representation.canFind(word[0]) && word.representation.all!(c => "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789()".representation.canFind(c))) || 431 | // other tokens 432 | word.among( 433 | "/", 434 | "/=", 435 | ".", 436 | "..", 437 | "...", 438 | "&", 439 | "&=", 440 | "&&", 441 | "|", 442 | "|=", 443 | "||", 444 | "-", 445 | "-=", 446 | "--", 447 | "+", 448 | "+=", 449 | "++", 450 | "<", 451 | "<=", 452 | "<<", 453 | "<<=", 454 | ">", 455 | ">=", 456 | ">>=", 457 | ">>>=", 458 | ">>", 459 | ">>>", 460 | "!", 461 | "!=", 462 | "(", 463 | ")", 464 | "[", 465 | "]", 466 | "{", 467 | "}", 468 | "?", 469 | ",", 470 | ";", 471 | ":", 472 | "$", 473 | "=", 474 | "==", 475 | "*", 476 | "*=", 477 | "%", 478 | "%=", 479 | "^", 480 | "^=", 481 | "^^", 482 | "^^=", 483 | "~", 484 | "~=", 485 | "@", 486 | "=>", 487 | "#", 488 | 489 | `q{`, 490 | 491 | "C", 492 | "C++", 493 | "D", 494 | "Windows", 495 | "System", 496 | "Objective-C", 497 | 498 | "classInstanceSize", // should have been getClassInstanceSize 499 | "allMembers", // should have been getAllMembers 500 | "derivedMembers", // should have been getDerivedMembers 501 | "toType", 502 | 503 | "__LOCAL_SIZE", 504 | ), "Unknown D: " ~ word); 505 | seqNodes ~= literalToken(word); 506 | } 507 | } 508 | else 509 | if (node.isCallTo("GLINK") || node.isCallTo("GLINK_LEX")) 510 | { 511 | auto text = node.getSingleTextChild(); 512 | enforce(text != context.currentName, "GLINK to %(%s%) should be GSELF".format([text])); 513 | seqNodes ~= reference(text); 514 | auto file = node.call.macroName == "GLINK_LEX" ? "lex" : context.file; 515 | grammar.links.add([file, text].staticArray); 516 | } 517 | else 518 | if (node.isCallTo("GLINK2")) 519 | { 520 | auto arguments = node.call.splitArguments(); 521 | enforce(arguments.length == 2); 522 | auto file = arguments[0].toString(); 523 | enforce(file != context.file, "GLINK2 to the current file should be GLINK"); 524 | auto text = arguments[1].toString(); 525 | enforce(text != context.currentName, "GLINK to %(%s%) should be GSELF".format([text])); 526 | seqNodes ~= reference(text); 527 | grammar.links.add([file, text].staticArray); 528 | } 529 | else 530 | if (node.isCallTo("LINK2") || node.isCallTo("RELATIVE_LINK2")) 531 | { 532 | auto arguments = node.call.splitArguments(); 533 | enforce(arguments.length == 2); 534 | seqNodes ~= parseDefinition(arguments[1], context); 535 | } 536 | else 537 | if (node.isCallTo("GSELF")) 538 | { 539 | auto text = node.getSingleTextChild(); 540 | enforce(text == context.currentName, "GSELF to %(%s%) should be GLINK or to %(%s%)".format([text], [context.currentName])); 541 | seqNodes ~= reference(text); 542 | } 543 | else 544 | if (node.isCallTo("OPT")) 545 | { 546 | enforce(seqNodes.length); 547 | seqNodes[$-1] = optional(seqNodes[$-1]); 548 | } 549 | else 550 | if (node.isCallTo("GDEPRECATED")) 551 | seqNodes ~= parseDefinition(node.call.contents, context); 552 | else 553 | if (node.isCallTo("GRESERVED")) 554 | seqNodes ~= parseDefinition(node.call.contents, context); 555 | else 556 | if (node.isCallToEmpty("CODE_AMP")) 557 | seqNodes ~= literalToken("&"); 558 | else 559 | if (node.isCallToEmpty("CODE_LCURL")) 560 | seqNodes ~= literalToken("{"); 561 | else 562 | if (node.isCallToEmpty("CODE_RCURL")) 563 | seqNodes ~= literalToken("}"); 564 | else 565 | if (node.isCallToEmpty("CODE_PERCENT")) 566 | seqNodes ~= literalToken("%"); 567 | else 568 | if (auto pdefinition = node.call.macroName in context.macros) 569 | seqNodes ~= parseDefinition(node.call.expand(*pdefinition), context); 570 | else 571 | throw new Exception("Unknown macro call (%(%s%)) in grammar".format([node.call.macroName])); 572 | } 573 | return seqNodes; 574 | } 575 | 576 | /// Parse and accumulate definitions from DDoc AST 577 | { 578 | ParseContext context; 579 | context.file = fileName; 580 | context.macros = macros; 581 | context.kind = kind; 582 | 583 | Node[] currentDefs; 584 | string[] newDefs; 585 | 586 | void flush() 587 | { 588 | if (!context.currentName) 589 | return; 590 | 591 | auto newDef = Def(choice(currentDefs), kind); 592 | grammar.defs.update(context.currentName, 593 | { newDefs ~= context.currentName; return newDef; }, 594 | (ref Def def) 595 | { 596 | enforce(Def(def.node, def.kind) == newDef, 597 | "Definition mismatch for " ~ context.currentName); 598 | } 599 | ); 600 | 601 | auto pDef = &grammar.defs[context.currentName]; 602 | pDef.definedIn.add(fileName); 603 | 604 | context.currentName = null; 605 | currentDefs = null; 606 | } 607 | 608 | foreach (line; preprocess(ddoc).split('\n')) 609 | { 610 | if (!line.length || (line.length == 1 && line[0].isText(""))) 611 | {} // Empty line 612 | else 613 | if (line.length == 2 && line[0].isCallTo("GNAME") && line[1].isText(":")) 614 | { 615 | // Definition 616 | flush(); 617 | context.currentName = line[0].getSingleTextChild(); 618 | } 619 | else 620 | if (line.length >= 2 && line[0].isText(" ")) 621 | { 622 | // Possible declaration 623 | enforce(context.currentName, "Body line without definition line"); 624 | currentDefs ~= seq(parseDefinition(line, context)); 625 | } 626 | else 627 | throw new Exception(format!"Can't parse grammar from: %s"(line)); 628 | } 629 | flush(); 630 | 631 | return newDefs; 632 | } 633 | } 634 | -------------------------------------------------------------------------------- /generator/source/writer.d: -------------------------------------------------------------------------------- 1 | module writer; 2 | 3 | import std.algorithm.iteration; 4 | import std.array; 5 | import std.stdio; 6 | import std.string; 7 | import std.sumtype; 8 | 9 | import ae.utils.aa; 10 | import ae.utils.text : splitByCamelCase; 11 | 12 | import grammar; 13 | 14 | struct Writer 15 | { 16 | File f; 17 | Grammar grammar; 18 | 19 | this(string fileName, Grammar grammar, const string[] extras) 20 | { 21 | this.grammar = grammar; 22 | 23 | f.open(fileName, "wb"); 24 | 25 | f.writef(q"EOF 26 | module.exports = grammar({ 27 | name: 'd', 28 | 29 | word: $ => $.identifier, 30 | 31 | extras: $ => [ 32 | %-( $.%s, 33 | %|%) ], 34 | 35 | rules: { 36 | EOF", extras.map!(extra => convertRuleName(extra))); 37 | } 38 | 39 | string currentFile; 40 | bool fileHeaderPending; 41 | bool sectionHeaderPending; 42 | 43 | void startFile(string file) 44 | { 45 | currentFile = file; 46 | fileHeaderPending = true; 47 | sectionHeaderPending = true; 48 | } 49 | 50 | void startSection() 51 | { 52 | if (!fileHeaderPending) 53 | sectionHeaderPending = true; 54 | } 55 | 56 | void writeRule(string defName) 57 | { 58 | scope(failure) { import std.stdio : stderr; stderr.writeln("Error while writing rule ", defName); } 59 | 60 | auto def = &grammar.defs[defName]; 61 | if (!def.used) 62 | return; 63 | 64 | if (fileHeaderPending) 65 | { 66 | f.writef(q"EOF 67 | 68 | // ------------------------------------------------------------------------ 69 | // https://dlang.org/spec/%s.html 70 | // ------------------------------------------------------------------------ 71 | EOF", currentFile); 72 | fileHeaderPending = false; 73 | sectionHeaderPending = false; 74 | } 75 | 76 | if (sectionHeaderPending) 77 | { 78 | f.write(q"EOF 79 | 80 | // --- 81 | EOF"); 82 | sectionHeaderPending = false; 83 | } 84 | 85 | f.writeln(); 86 | if (!def.synthetic) 87 | f.writefln(" // https://dlang.org/spec/%s.html#%s", 88 | currentFile, 89 | defName, 90 | ); 91 | 92 | f.writefln(" %s: $ =>", 93 | convertRuleName(defName)); 94 | writeRuleBody(defName); 95 | 96 | foreach (tail; def.tail) 97 | writeRule(tail); 98 | } 99 | 100 | void close() 101 | { 102 | f.write(q"EOF 103 | } 104 | }); 105 | EOF"); 106 | } 107 | 108 | private: 109 | string convertRuleName(string name) 110 | { 111 | string publicName = name; 112 | if (auto defPublicName = grammar.defs[name].publicName) 113 | publicName = defPublicName; 114 | return (grammar.defs[name].hidden ? "_" : "") ~ publicName.splitByCamelCase.map!toLower.join("_"); 115 | } 116 | 117 | void writeRuleBody(string defName) 118 | { 119 | int indent = 6; 120 | 121 | void line(string s) { f.writeln(" ".replicate(indent), s); } 122 | void single(string s) { line(s ~ ","); } 123 | 124 | void list(T)(string fun, T[] children, void delegate(ref T) childWriter) 125 | { 126 | if (!children.length) 127 | { 128 | line(fun ~ "(),"); 129 | return; 130 | } 131 | line(fun ~ "("); 132 | indent += 2; 133 | foreach (ref child; children) 134 | childWriter(child); 135 | indent -= 2; 136 | line("),"); 137 | } 138 | 139 | HashSet!string visiting; 140 | 141 | void writeDef(ref string defName) 142 | { 143 | if (defName in visiting) 144 | return single("/* recursion */"); 145 | visiting.add(defName); 146 | scope(success) visiting.remove(defName); 147 | 148 | auto def = &grammar.defs[defName]; 149 | if (def.kind == Grammar.Def.Kind.chars) 150 | line("// " ~ defName); 151 | 152 | void writeNode(ref Grammar.Node node) 153 | { 154 | node.match!( 155 | (ref Grammar.RegExp v) => single(v.regexp), 156 | (ref Grammar.LiteralChars v) => single(format!"%(%s%)"([v.chars])), 157 | (ref Grammar.LiteralToken v) => single(format!"%(%s%)"([v.literal])), 158 | // https://issues.dlang.org/show_bug.cgi?id=22016 159 | (ref Grammar.Reference v) { if (def.kind == Grammar.Def.Kind.chars) writeDef(v.name); else single("$." ~ convertRuleName(v.name)); }, 160 | (ref Grammar.Choice v) => list("choice" , v.nodes, &writeNode), 161 | (ref Grammar.Seq v) => list("seq" , v.nodes, &writeNode), 162 | (ref Grammar.Repeat v) => list("repeat" , v.nodes, &writeNode), 163 | (ref Grammar.Repeat1 v) => list("repeat1" , v.nodes, &writeNode), 164 | (ref Grammar.Optional v) => list("optional", v.nodes, &writeNode), 165 | (ref Grammar.SeqChoice v) { assert(false); }, 166 | ); 167 | } 168 | writeNode(def.node); 169 | } 170 | 171 | auto def = &grammar.defs[defName]; 172 | final switch (def.kind) 173 | { 174 | case Grammar.Def.Kind.chars: 175 | list("token", [defName], &writeDef); 176 | break; 177 | case Grammar.Def.Kind.tokens: 178 | writeDef(defName); 179 | break; 180 | } 181 | } 182 | } 183 | 184 | // " -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tree-sitter-d", 3 | "version": "0.0.1", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "nan": { 8 | "version": "2.14.2", 9 | "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz", 10 | "integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==" 11 | }, 12 | "tree-sitter-cli": { 13 | "version": "0.20.0", 14 | "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.0.tgz", 15 | "integrity": "sha512-4D1qapWbJXZ5rrSUGM5rcw5Vuq/smzn9KbiFRhlON6KeuuXjra+KAtDYVrDgAoLIG4ku+jbEEGrJxCptUGi3dg==", 16 | "dev": true 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tree-sitter-d", 3 | "version": "0.0.1", 4 | "description": "D grammar for tree-sitter", 5 | "main": "bindings/node", 6 | "keywords": [ 7 | "parser", 8 | "lexer" 9 | ], 10 | "author": "Vladimir Panteleev and the D Language Foundation", 11 | "license": "BSL-1.0", 12 | "dependencies": { 13 | "nan": "^2.14.2" 14 | }, 15 | "devDependencies": { 16 | "tree-sitter-cli": "^0.20.0" 17 | }, 18 | "scripts": { 19 | "generate": "tree-sitter generate", 20 | "test": "tree-sitter test" 21 | }, 22 | "repository": { 23 | "type": "git", 24 | "url": "git+https://github.com/CyberShadow/tree-sitter-d.git" 25 | }, 26 | "bugs": { 27 | "url": "https://github.com/CyberShadow/tree-sitter-d/issues" 28 | }, 29 | "homepage": "https://github.com/CyberShadow/tree-sitter-d#readme", 30 | "tree-sitter": [ 31 | { 32 | "scope": "source.d", 33 | "file-types": [ 34 | "d", 35 | "di" 36 | ] 37 | } 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /src/scanner.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | enum TokenType { 5 | NESTING_BLOCK_COMMENT, 6 | DELIMITED_STRING, 7 | }; 8 | 9 | // This is only an approximation of the exact definition. 10 | static bool is_identifier_char(int32_t c) { 11 | return 12 | (c >= 'a' && c <= 'z') || 13 | (c >= 'A' && c <= 'Z') || 14 | (c >= '0' && c <= '9') || 15 | c == '_'; 16 | } 17 | 18 | extern "C" { 19 | 20 | void *tree_sitter_d_external_scanner_create() { 21 | return NULL; 22 | } 23 | 24 | bool tree_sitter_d_external_scanner_scan(void *payload, TSLexer *lexer, 25 | const bool *valid_symbols) { 26 | if (lexer->lookahead == '/' && valid_symbols[NESTING_BLOCK_COMMENT]) { 27 | lexer->advance(lexer, false); 28 | if (lexer->lookahead != '+') { 29 | return false; 30 | } 31 | lexer->advance(lexer, false); 32 | 33 | size_t depth = 1; 34 | int32_t last = 0; 35 | while (depth > 0) { 36 | last = lexer->lookahead; 37 | lexer->advance(lexer, false); 38 | if (last == '/' && lexer->lookahead == '+') { 39 | depth++; 40 | last = 0; 41 | lexer->advance(lexer, false); 42 | } else if (last == '+' && lexer->lookahead == '/') { 43 | depth--; 44 | last = 0; 45 | lexer->advance(lexer, false); 46 | } else if (lexer->lookahead == 0) { 47 | return false; // EOF 48 | } 49 | } 50 | lexer->result_symbol = NESTING_BLOCK_COMMENT; 51 | return true; 52 | } 53 | 54 | if (lexer->lookahead == 'q' && valid_symbols[DELIMITED_STRING]) { 55 | lexer->advance(lexer, false); 56 | if (lexer->lookahead != '"') { 57 | return false; 58 | } 59 | lexer->advance(lexer, false); 60 | lexer->result_symbol = DELIMITED_STRING; 61 | 62 | int32_t opener = lexer->lookahead, closer; 63 | switch (opener) { 64 | case '(': closer = ')'; break; 65 | case '[': closer = ']'; break; 66 | case '{': closer = '}'; break; 67 | case '<': closer = '>'; break; 68 | default: 69 | { 70 | // Handle the identifier case 71 | std::vector delimiter; 72 | delimiter.push_back('\n'); 73 | while (lexer->lookahead != '\n') { 74 | if (!is_identifier_char(lexer->lookahead)) 75 | return false; // bad syntax or EOF 76 | delimiter.push_back(lexer->lookahead); 77 | lexer->advance(lexer, false); 78 | } 79 | delimiter.push_back('"'); 80 | 81 | size_t delimiter_pos = 0; 82 | while (true) { 83 | if (lexer->lookahead == 0) 84 | return false; // EOF 85 | if (delimiter_pos == delimiter.size()) 86 | return true; 87 | if (lexer->lookahead == delimiter.at(delimiter_pos)) 88 | delimiter_pos++; 89 | else 90 | delimiter_pos = lexer->lookahead == delimiter.at(0) ? 1 : 0; 91 | lexer->advance(lexer, false); 92 | } 93 | } 94 | } 95 | 96 | // Handle the punctuation case 97 | size_t depth = 1; 98 | while (depth > 0) { 99 | lexer->advance(lexer, false); 100 | if (lexer->lookahead == opener) { 101 | depth++; 102 | } else if (lexer->lookahead == closer) { 103 | depth--; 104 | } else if (lexer->lookahead == 0) { 105 | return false; // EOF 106 | } 107 | } 108 | lexer->advance(lexer, false); // last closer 109 | if (lexer->lookahead != '"') 110 | return false; 111 | lexer->advance(lexer, false); // " 112 | return true; 113 | } 114 | 115 | return false; 116 | } 117 | 118 | unsigned tree_sitter_d_external_scanner_serialize(void *payload, char *buffer) { 119 | return 0; 120 | } 121 | 122 | void tree_sitter_d_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { 123 | } 124 | 125 | void tree_sitter_d_external_scanner_destroy(void *payload) { 126 | } 127 | 128 | } 129 | -------------------------------------------------------------------------------- /test/corpus/2_lex-13_floatliteral.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | FloatLiteral + UFCS 3 | ==================== 4 | 5 | float a = 1.0; 6 | float a = 1.foo; 7 | float a = 1. .foo; 8 | 9 | --- 10 | 11 | (source_file 12 | (module 13 | (decl_defs 14 | 15 | (var_declarations 16 | (fundamental_type) 17 | (declarators 18 | (declarator_initializer 19 | (var_declarator 20 | (identifier)) 21 | (exp_initializer 22 | (primary_expression 23 | (float_literal)))))) 24 | 25 | (var_declarations 26 | (fundamental_type) 27 | (declarators 28 | (declarator_initializer 29 | (var_declarator 30 | (identifier)) 31 | (exp_initializer 32 | (postfix_expression 33 | (primary_expression 34 | (integer_literal)) 35 | (identifier)))))) 36 | 37 | (var_declarations 38 | (fundamental_type) 39 | (declarators 40 | (declarator_initializer 41 | (var_declarator 42 | (identifier)) 43 | (exp_initializer 44 | (postfix_expression 45 | (primary_expression 46 | (float_literal)) 47 | (identifier)))))) 48 | ))) 49 | -------------------------------------------------------------------------------- /test/corpus/2_lex-16_special_token_sequence.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | SpecialTokenSequence 3 | ==================== 4 | 5 | #line 1 6 | # line 2 7 | # line 3 "file.d" 8 | 9 | --- 10 | 11 | (source_file 12 | (special_token_sequence (integer_literal)) 13 | (special_token_sequence (integer_literal)) 14 | (special_token_sequence (integer_literal) (filespec))) 15 | -------------------------------------------------------------------------------- /test/corpus/2_lex-1_source_text.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | the empty file 3 | ==================== 4 | 5 | --- 6 | 7 | (source_file) 8 | 9 | ==================== 10 | Shebang 11 | ==================== 12 | 13 | #!/usr/bin/dmd -run 14 | 15 | --- 16 | 17 | (source_file 18 | (shebang)) 19 | -------------------------------------------------------------------------------- /test/corpus/2_lex-6_comment.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | LineComment 3 | ==================== 4 | 5 | // this is a comment 6 | 7 | --- 8 | 9 | (source_file 10 | (line_comment)) 11 | 12 | ==================== 13 | BlockComment 14 | ==================== 15 | 16 | /* this is a comment */ 17 | 18 | /* multi 19 | line 20 | comment */ 21 | 22 | /* no nesting /* */ 23 | 24 | /* no interior line comments // */ 25 | 26 | --- 27 | 28 | (source_file 29 | (block_comment) 30 | (block_comment) 31 | (block_comment) 32 | (block_comment)) 33 | 34 | ==================== 35 | BlockComment 2 36 | ==================== 37 | 38 | int i; 39 | 40 | /***************************************************/ 41 | 42 | --- 43 | 44 | (source_file 45 | (module 46 | (decl_defs 47 | (var_declarations 48 | (fundamental_type) 49 | (declarators 50 | (var_declarator 51 | (identifier)))))) 52 | (block_comment)) 53 | 54 | ==================== 55 | NestingBlockComment 56 | ==================== 57 | 58 | /+ this is a comment +/ 59 | 60 | /+ /+ nesting! +/ +/ 61 | 62 | /+ /* +/ 63 | /+ */ +/ 64 | /+ // +/ 65 | 66 | /+ /+/ +/ +/ 67 | /+ /+ +/+ +/ 68 | 69 | int/+ +/a; 70 | 71 | --- 72 | 73 | (source_file 74 | (nesting_block_comment) 75 | (nesting_block_comment) 76 | (nesting_block_comment) 77 | (nesting_block_comment) 78 | (nesting_block_comment) 79 | (nesting_block_comment) 80 | (nesting_block_comment) 81 | 82 | (module 83 | (decl_defs 84 | (var_declarations 85 | (fundamental_type) 86 | (nesting_block_comment) 87 | (declarators 88 | (var_declarator 89 | (identifier))))))) 90 | -------------------------------------------------------------------------------- /test/corpus/2_lex-9_string_literals.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | WysiwygString 3 | ==================== 4 | 5 | x!r"Hello, world!" y; 6 | x!r"`\" y; 7 | x!r" 8 | " y; 9 | 10 | --- 11 | 12 | (source_file 13 | (module 14 | (decl_defs 15 | 16 | (var_declarations 17 | (qualified_identifier 18 | (template_instance 19 | (identifier) 20 | (template_arguments 21 | (template_single_argument 22 | (wysiwyg_string))))) 23 | (declarators 24 | (var_declarator 25 | (identifier)))) 26 | 27 | (var_declarations 28 | (qualified_identifier 29 | (template_instance 30 | (identifier) 31 | (template_arguments 32 | (template_single_argument 33 | (wysiwyg_string))))) 34 | (declarators 35 | (var_declarator 36 | (identifier)))) 37 | 38 | (var_declarations 39 | (qualified_identifier 40 | (template_instance 41 | (identifier) 42 | (template_arguments 43 | (template_single_argument 44 | (wysiwyg_string))))) 45 | (declarators 46 | (var_declarator 47 | (identifier))))))) 48 | 49 | ==================== 50 | AlternateWysiwygString 51 | ==================== 52 | 53 | x!`Hello, world!` y; 54 | x!`\` y; 55 | x!` 56 | ` y; 57 | 58 | --- 59 | 60 | (source_file 61 | (module 62 | (decl_defs 63 | 64 | (var_declarations 65 | (qualified_identifier 66 | (template_instance 67 | (identifier) 68 | (template_arguments 69 | (template_single_argument 70 | (alternate_wysiwyg_string))))) 71 | (declarators 72 | (var_declarator 73 | (identifier)))) 74 | 75 | (var_declarations 76 | (qualified_identifier 77 | (template_instance 78 | (identifier) 79 | (template_arguments 80 | (template_single_argument 81 | (alternate_wysiwyg_string))))) 82 | (declarators 83 | (var_declarator 84 | (identifier)))) 85 | 86 | (var_declarations 87 | (qualified_identifier 88 | (template_instance 89 | (identifier) 90 | (template_arguments 91 | (template_single_argument 92 | (alternate_wysiwyg_string))))) 93 | (declarators 94 | (var_declarator 95 | (identifier))))))) 96 | 97 | ==================== 98 | DoubleQuotedString 99 | ==================== 100 | 101 | x!"Hello, world!" y; 102 | x!"\"" y; 103 | x!" 104 | " y; 105 | 106 | --- 107 | 108 | (source_file 109 | (module 110 | (decl_defs 111 | 112 | (var_declarations 113 | (qualified_identifier 114 | (template_instance 115 | (identifier) 116 | (template_arguments 117 | (template_single_argument 118 | (double_quoted_string))))) 119 | (declarators 120 | (var_declarator 121 | (identifier)))) 122 | 123 | (var_declarations 124 | (qualified_identifier 125 | (template_instance 126 | (identifier) 127 | (template_arguments 128 | (template_single_argument 129 | (double_quoted_string))))) 130 | (declarators 131 | (var_declarator 132 | (identifier)))) 133 | 134 | (var_declarations 135 | (qualified_identifier 136 | (template_instance 137 | (identifier) 138 | (template_arguments 139 | (template_single_argument 140 | (double_quoted_string))))) 141 | (declarators 142 | (var_declarator 143 | (identifier))))))) 144 | 145 | ==================== 146 | DelimitedString 147 | ==================== 148 | 149 | x!q"EOF 150 | Hello, world! 151 | Not the end: EOF" 152 | EOF: also not the end 153 | Also not the end: 154 | EOF 155 | The real end: 156 | EOF" y; 157 | 158 | x!q"( ( [ ) < { )" y; 159 | x!q"[ [ ( ] < { ]" y; 160 | x!q"{ { [ } < ) }" y; 161 | x!q"< < ( > [ { >" y; 162 | 163 | --- 164 | 165 | (source_file 166 | (module 167 | (decl_defs 168 | 169 | (var_declarations 170 | (qualified_identifier 171 | (template_instance 172 | (identifier) 173 | (template_arguments 174 | (template_single_argument 175 | (delimited_string))))) 176 | (declarators 177 | (var_declarator 178 | (identifier)))) 179 | 180 | (var_declarations 181 | (qualified_identifier 182 | (template_instance 183 | (identifier) 184 | (template_arguments 185 | (template_single_argument 186 | (delimited_string))))) 187 | (declarators 188 | (var_declarator 189 | (identifier)))) 190 | 191 | (var_declarations 192 | (qualified_identifier 193 | (template_instance 194 | (identifier) 195 | (template_arguments 196 | (template_single_argument 197 | (delimited_string))))) 198 | (declarators 199 | (var_declarator 200 | (identifier)))) 201 | 202 | (var_declarations 203 | (qualified_identifier 204 | (template_instance 205 | (identifier) 206 | (template_arguments 207 | (template_single_argument 208 | (delimited_string))))) 209 | (declarators 210 | (var_declarator 211 | (identifier)))) 212 | 213 | (var_declarations 214 | (qualified_identifier 215 | (template_instance 216 | (identifier) 217 | (template_arguments 218 | (template_single_argument 219 | (delimited_string))))) 220 | (declarators 221 | (var_declarator 222 | (identifier))))))) 223 | 224 | -------------------------------------------------------------------------------- /test/corpus/30_iasm-11_gcc.txt: -------------------------------------------------------------------------------- 1 | ==================== 2 | GccExtAsmInstruction 3 | ==================== 4 | 5 | void cpuid() 6 | { 7 | uint u; 8 | asm { "cpuid" : "=eax" (u) : "eax" (0x8000_0000) : "ebx", "ecx", "edx"; } 9 | } 10 | 11 | --- 12 | 13 | (source_file 14 | (module 15 | (decl_defs 16 | (func_declaration 17 | (fundamental_type) 18 | (func_declarator 19 | (identifier) 20 | (func_declarator_suffix 21 | (parameters))) 22 | 23 | (specified_function_body 24 | (block_statement 25 | (statement_list 26 | (declaration_statement 27 | (var_declarations 28 | (fundamental_type) 29 | (declarators 30 | (var_declarator 31 | (identifier))))) 32 | 33 | (gcc_asm_statement 34 | (gcc_asm_instruction_list 35 | (gcc_ext_asm_instruction 36 | (primary_expression 37 | (string_literals 38 | (double_quoted_string))) 39 | 40 | (gcc_asm_operands 41 | (double_quoted_string) 42 | (primary_expression 43 | (identifier))) 44 | 45 | (gcc_asm_operands 46 | (double_quoted_string) 47 | (primary_expression 48 | (integer_literal))) 49 | 50 | (gcc_asm_clobbers 51 | (double_quoted_string) 52 | (gcc_asm_clobbers 53 | (double_quoted_string) 54 | (double_quoted_string))))))))))))) 55 | -------------------------------------------------------------------------------- /test/parse-success-xfail.txt: -------------------------------------------------------------------------------- 1 | # TODO 2 | test/parse-success/dmd/runnable/complex.d 3 | test/parse-success/dmd/runnable/helloUTF16BE.d 4 | test/parse-success/dmd/runnable/helloUTF16.d 5 | test/parse-success/dmd/runnable/sdtor.d 6 | test/parse-success/dmd/runnable/test15.d 7 | test/parse-success/dmd/runnable/testcontracts.d 8 | test/parse-success/dmd/runnable/testUTF32.d 9 | test/parse-success/dmd/runnable/uda.d 10 | -------------------------------------------------------------------------------- /test/parse-success/dmd/compilable: -------------------------------------------------------------------------------- 1 | ../../repos/dmd/test/compilable -------------------------------------------------------------------------------- /test/parse-success/dmd/runnable: -------------------------------------------------------------------------------- 1 | ../../repos/dmd/test/runnable -------------------------------------------------------------------------------- /test/parse-success/dmd_asm.d: -------------------------------------------------------------------------------- 1 | void fun() 2 | { 3 | asm 4 | { 5 | int 80; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /test/repos/README.md: -------------------------------------------------------------------------------- 1 | D projects which we use for testing the parser are referenced as submodules here. 2 | -------------------------------------------------------------------------------- /test/tmp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !/.gitignore 3 | --------------------------------------------------------------------------------