├── .gitignore ├── Makefile ├── README.md ├── bootstrap.d ├── cli.d ├── ddlib ├── lexan.d └── templates.d ├── ddpg.d ├── dunnart.ddgs ├── errors.d ├── examples ├── calc │ ├── Makefile │ ├── calc.d │ ├── parser.ddgs │ └── test_data ├── calc2 │ ├── Makefile │ ├── calc.d │ ├── parser.ddgs │ └── test_data ├── evaluater │ ├── Makefile │ ├── evaluator.d │ ├── parser.ddgs │ └── test_input └── test_inject │ ├── Makefile │ ├── calc.d │ ├── expr.ddgsi │ ├── fields.ddgsi │ ├── header.ddgsi │ ├── line.ddgsi │ ├── parser.ddgs │ ├── precs.ddgsi │ ├── skips.ddgsi │ ├── test_data │ └── tokens.ddgsi ├── grammar.d ├── idnumber.d ├── sets.d ├── symbols.d └── workarounds.d /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # D 4 | *.o 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Backup files 10 | *~ 11 | 12 | # Packages 13 | *.egg 14 | *.egg-info 15 | dist 16 | build 17 | eggs 18 | parts 19 | bin 20 | var 21 | sdist 22 | develop-eggs 23 | .installed.cfg 24 | lib 25 | lib64 26 | 27 | # Installer logs 28 | pip-log.txt 29 | 30 | # Unit test / coverage reports 31 | .coverage 32 | .tox 33 | nosetests.xml 34 | 35 | # Translations 36 | *.mo 37 | 38 | # Mr Developer 39 | .mr.developer.cfg 40 | .project 41 | .pydevproject 42 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MSRCS = grammar.d sets.d symbols.d ddlib/lexan.d ddlib/templates.d cli.d errors.d workarounds.d 2 | 3 | ddpg: ddpg.d dunnart.d $(MSRCS) Makefile 4 | dmd ddpg.d dunnart.d $(MSRCS) 5 | 6 | dunnart.d: ddpg_bootstrap dunnart.ddgs 7 | ./ddpg_bootstrap -f --states=dunnart.states dunnart.ddgs 8 | 9 | ddpg_bootstrap: ddpg.d bootstrap.d $(MSRCS) Makefile 10 | dmd -ofddpg_bootstrap -version=bootstrap ddpg.d bootstrap.d $(MSRCS) 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Dunnart D Parser Generator (ddpg) 2 | ================================= 3 | 4 | Enhanced LALR(1) Parser Generator for the D Programming Language. 5 | 6 | This (ddgp) tool is an implementation in D of the LALR(1) parser generators 7 | described in "Compilers - Principles, Techniques and Tools" (Aho, Sethi 8 | and Ullman, 1986) (aka the red dragon book) with modifications to allow 9 | conflict resolution using boolean predicates and a built in lexical 10 | analyser. It produces a single D file as output. 11 | 12 | The [specification language](https://github.com/pwil3058/dunnart/wiki/Specification-Language) 13 | for dunnart (in itself) is described (in the 14 | specification language for dunnart) in the file dunnart.ddgs which was 15 | used to implement the ddpg program recursively via a three stage 16 | bootstrap process (one stage now permanently retired). 17 | 18 | Building ddgp 19 | ============= 20 | 21 | To build __ddgp__ execute the following: 22 | ``` 23 | $ make 24 | ``` 25 | in the base directory. 26 | 27 | Synopsis 28 | ======== 29 | ``` 30 | ddgp [] 31 | Options: 32 | -f|--force: 33 | overwrite existing output file 34 | -m | --module=: 35 | insert a "module" statement in the output file using name 36 | -v | --verbose: 37 | produce a full description of the grammar generated 38 | -o | --output= | --prefix=: 41 | if using a path name based on the module name prefix it with path 42 | -e | --expect=: 43 | expect exactly "number" total conflicts 44 | -s | --states=: 45 | write a textual description of the parser's states to "path" 46 | to assist in resolution of any conflicts 47 | 48 | ``` 49 | 50 | Output File Name 51 | ================ 52 | If the _--module_ option is used the output filename will be derived 53 | from the module name (prefixed with the argument to the _--prefix_ 54 | option if present) unless it is overruled by the _--output_ option. 55 | If neither the _--module_ nor _--output_ option are present, the output 56 | filename will be constructed by replacing the input filename's suffix 57 | with "d". 58 | 59 | In no event, will an existing file be overwritten unless the _--force_ 60 | option is used. 61 | 62 | License 63 | ======= 64 | Dunnart D Parser Generator (ddpg) is licensed under the Boost Software 65 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or 66 | [copy at](http://www.boost.org/LICENSE_1_0.txt)). 67 | -------------------------------------------------------------------------------- /cli.d: -------------------------------------------------------------------------------- 1 | // cli.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | module cli; 10 | 11 | import std.stdio; 12 | import std.file; 13 | import std.getopt; 14 | import std.regex; 15 | import std.path; 16 | 17 | bool verbose; 18 | bool force; 19 | string module_name; 20 | string input_file_path; 21 | string output_file_path; 22 | string state_file_path; 23 | string prefix_path; 24 | uint expected_number_of_conflicts; 25 | 26 | bool process_command_line(string[] args) 27 | { 28 | getopt(args, 29 | "f|force", &force, 30 | "m|module", &module_name, 31 | "v|verbose", &verbose, 32 | "o|output", &output_file_path, 33 | "p|prefix", &prefix_path, 34 | "e|expect", &expected_number_of_conflicts, 35 | "s|states", &state_file_path, 36 | ); 37 | if (args.length != 2) { 38 | print_usage(args[0]); 39 | return false; 40 | } 41 | input_file_path = args[1]; 42 | 43 | // if the output file path isn't specified then generate it 44 | if (output_file_path.length == 0) { 45 | if (module_name.length > 0) { 46 | output_file_path = module_file_path(module_name, prefix_path); 47 | if (!isValidPath(output_file_path)) { 48 | stderr.writefln("%s: is not a valid file path", output_file_path); 49 | return false; 50 | } 51 | } else { 52 | output_file_path = stripExtension(input_file_path) ~ ".d"; 53 | } 54 | } 55 | // Don't overwrite existing files without specific authorization 56 | if (!force && exists(output_file_path)) { 57 | stderr.writefln("%s: already exists: use --force (or -f) to overwrite", output_file_path); 58 | return false; 59 | } 60 | return true; 61 | } 62 | 63 | void print_usage(string command) 64 | { 65 | writefln("Usage: %s [--force|-f] [--verbose|-v] [(--module|-m)=] [(--expect|-e)=] [(--output|-0)=] ", command); 66 | } 67 | 68 | string module_file_path(string module_name, string prefix_path) 69 | { 70 | static auto split_re = regex(r"\."); 71 | string[] parts; 72 | if (prefix_path.length == 0) { 73 | return buildPath(split(module_name, split_re)) ~ ".d"; 74 | } else { 75 | return buildNormalizedPath([prefix_path] ~ split(module_name, split_re)) ~ ".d"; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /ddlib/lexan.d: -------------------------------------------------------------------------------- 1 | // lexan.d 2 | // 3 | // Copyright Peter Williams 2014 . 4 | // 5 | // This file is part of grallina and dunnart. 6 | // 7 | // grallina and dunnart are free software; you can redistribute it and/or modify 8 | // it under the terms of the GNU Lesser General Public License 9 | // as published by the Free Software Foundation; either version 3 10 | // of the License, or (at your option) any later version, with 11 | // some exceptions, please read the COPYING file. 12 | // 13 | // grallina is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU Lesser General Public License for more details. 17 | // 18 | // You should have received a copy of the GNU Lesser General Public License 19 | // along with grallina; if not, write to the Free Software 20 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 21 | /** 22 | * Provides a simple mechanism for creating lexical analysers. The lexicon 23 | * to be analysed is specified using a combination of literal lexemes 24 | * (for reserved words, operators, etc.), regex lexemes (for names, numbers, etc.) 25 | * and skip (white space) regexes. 26 | * 27 | * The resulting lexical analyser can be used to create two types of 28 | * token input range: 29 | * - a simple token input range for a single string 30 | * - an injectable input range which starts out with a single string 31 | * but may have arbitrary strings injected into it at any time e.g. 32 | * implementation of C include files. 33 | * 34 | * The string being analysed and all of the state data for the progress 35 | * of the analysis are held within the generated input ranges and a 36 | * single lexical analyser can be used to analyse several strings 37 | * simultaneously. I.e. only one copy of the analyser for any lexicon 38 | * is needed and it acts as a token input range factory. 39 | * 40 | * The token input ranges are light weight as they do not carry their 41 | * own copy of the specification just a reference to the lexical analyser 42 | * which provides the necessary services via a defined interface. 43 | * 44 | * Copyright: Copyright Peter Williams 2014-. 45 | * 46 | * License: $(WEB gnu.org/licenses/lgpl.html, GNU Lesser General Public License 3.0). 47 | * 48 | * Authors: Peter Williams 49 | * 50 | * Source: $(DUNNARTSRC ddlb.lexan.d) 51 | */ 52 | 53 | module ddlib.lexan; 54 | 55 | import std.regex; 56 | import std.ascii; 57 | import std.string; 58 | 59 | /** 60 | * This is a base exception from which all Lexan exceptions are derived. 61 | * It provides a mechanism to catch any Lexan exception. 62 | */ 63 | class LexanException: Exception { 64 | this(string message, string file=__FILE__, size_t line=__LINE__, Throwable next=null) 65 | { 66 | super("LexAn Error:" ~ message, file, line, next); 67 | } 68 | } 69 | 70 | /** 71 | * This exception is thrown if two literal lexemes have the same pattern 72 | * and should not be caught. It indicates that there is an error in the 73 | * specification that needs to be fixed before a usable lexical analyser 74 | * can be created. This will be thrown during the initial phase. 75 | */ 76 | class LexanDuplicateLiteralPattern: LexanException { 77 | string duplicate_pattern; /// the duplicated pattern 78 | 79 | this(string name, string file=__FILE__, size_t line=__LINE__, Throwable next=null) 80 | { 81 | duplicate_pattern = name; 82 | super(format("Duplicated literal specification: \"%s\".", name), file, line, next); 83 | } 84 | } 85 | 86 | /** 87 | * This exception is thrown if two regex lexemes match the same string 88 | * and should not be caught. It indicates that there is an error in the 89 | * specification that needs to be fixed before a usable lexical analyser 90 | * can be created. It is thrown if such a situation arises while analysing 91 | * a string and will not be thrown during the initialization phase. 92 | */ 93 | class LexanMultipleRegexMatches(H): LexanException { 94 | string matched_text; /// the piece of text that was matched 95 | H[] handles; /// the handles of the regex lexemes that made the match 96 | CharLocation location; 97 | 98 | this(HandleAndText!(H)[] hats, CharLocation locn, string file=__FILE__, size_t line=__LINE__, Throwable next=null) 99 | { 100 | matched_text = hats[0].text; 101 | foreach (hat; hats) { 102 | handles ~= hat.handle; 103 | } 104 | string msg = format("Regexes : %s: all match \"%s\" at %s.", handles, matched_text, locn); 105 | super(msg, file, line, next); 106 | } 107 | } 108 | 109 | /** 110 | * This exception is raised during analysis of a provided text when 111 | * characters are encountered that: 112 | * - are not skipped by the skip regexes, 113 | * - do not matche any of the literal lexemes' patterns, and 114 | * - are not matched by any of the regex lexems. 115 | * It contains data about the nature and location of the problematic 116 | * characters and leaves the token input range that threw it in a usable 117 | * state in that it is in a position to provide a valid token or is empty. 118 | * 119 | * It is safe to catch this exception as it concerns an error in the 120 | * analysed text not the lexical analyser and client can then either 121 | * abandon the analysis or continue analysing the remainder of the 122 | * text as best suits their needs. 123 | */ 124 | class LexanInvalidToken: LexanException { 125 | string unexpected_text; 126 | CharLocation location; 127 | 128 | this(string utext, CharLocation locn, string file=__FILE__, size_t line=__LINE__, Throwable next=null) 129 | { 130 | unexpected_text = utext; 131 | location = locn; 132 | string msg = format("Invalid Iput: \"%s\" at %s.", utext, locn); 133 | super(msg, file, line, next); 134 | } 135 | } 136 | 137 | /** 138 | * A struct for defining literal lexemes. 139 | * Example: 140 | * --- 141 | * enum MyTokens { IF, THEN, ELSE, PLUS, VARName, ........} 142 | * 143 | * static auto my_literals = [ 144 | * LiteralLexeme!MyTokens(IF, "if"), 145 | * LiteralLexeme!MyTokens(THEN, "then"), 146 | * LiteralLexeme!MyTokens(ELSE, "else"), 147 | * LiteralLexeme!MyTokens(PLUS, "+"), 148 | * ] 149 | * --- 150 | */ 151 | struct LiteralLexeme(H) { 152 | H handle; /// a unique handle for this lexeme 153 | string pattern; /// the text pattern that the lexeme represents 154 | 155 | @property 156 | size_t length() 157 | { 158 | return pattern.length; 159 | } 160 | 161 | @property 162 | bool is_valid() 163 | { 164 | return pattern.length > 0; 165 | } 166 | } 167 | unittest { 168 | LiteralLexeme!(int) el; 169 | assert(!el.is_valid); 170 | static auto ll = LiteralLexeme!(int)(6, "six"); 171 | assert(ll.is_valid); 172 | } 173 | 174 | struct RegexLexeme(H, RE) { 175 | H handle; 176 | RE re; 177 | 178 | @property 179 | bool is_valid() 180 | { 181 | return !re.empty; 182 | } 183 | } 184 | 185 | template CtRegexLexeme(H, H handle, string script) { 186 | static if (script[0] == '^') { 187 | enum CtRegexLexeme = RegexLexeme!(H, StaticRegex!char)(handle, ctRegex!(script)); 188 | } else { 189 | enum CtRegexLexeme = RegexLexeme!(H, StaticRegex!char)(handle, ctRegex!("^" ~ script)); 190 | } 191 | } 192 | 193 | template EtRegexLexeme(H, H handle, string script) { 194 | static if (script[0] == '^') { 195 | enum EtRegexLexeme = RegexLexeme!(H, Regex!char)(handle, regex(script)); 196 | } else { 197 | enum EtRegexLexeme = RegexLexeme!(H, Regex!char)(handle, regex("^" ~ script)); 198 | } 199 | } 200 | 201 | unittest { 202 | RegexLexeme!(int, StaticRegex!char) erel; 203 | assert(!erel.is_valid); 204 | static auto rel = RegexLexeme!(int, StaticRegex!char)(12, ctRegex!("^twelve")); 205 | assert(rel.is_valid); 206 | RegexLexeme!(int, Regex!char) edrel; 207 | assert(!edrel.is_valid); 208 | auto drel = RegexLexeme!(int, Regex!char)(12, regex("^twelve")); 209 | assert(drel.is_valid); 210 | auto tdrel = EtRegexLexeme!(int, 13, "^twelve"); 211 | } 212 | 213 | private class LiteralMatchNode(H) { 214 | long lexeme_index; 215 | LiteralMatchNode!(H)[char] tails; 216 | 217 | this(string str, long str_lexeme_index) 218 | { 219 | if (str.length == 0) { 220 | lexeme_index = str_lexeme_index; 221 | } else { 222 | lexeme_index = -1; 223 | tails[str[0]] = new LiteralMatchNode(str[1..$], str_lexeme_index); 224 | } 225 | } 226 | 227 | void add_tail(string new_tail, long nt_lexeme_index) 228 | { 229 | if (new_tail.length == 0) { 230 | if (lexeme_index >= 0) throw new LexanException(""); 231 | lexeme_index = nt_lexeme_index; 232 | } else if (new_tail[0] in tails) { 233 | tails[new_tail[0]].add_tail(new_tail[1..$], nt_lexeme_index); 234 | } else { 235 | tails[new_tail[0]] = new LiteralMatchNode(new_tail[1..$], nt_lexeme_index); 236 | } 237 | } 238 | } 239 | 240 | class LiteralMatcher(H) { 241 | private: 242 | LiteralLexeme!(H)[] lexemes; 243 | LiteralMatchNode!(H)[char] literals; 244 | 245 | public: 246 | this(ref LiteralLexeme!(H)[] lexeme_list) 247 | { 248 | lexemes = lexeme_list; 249 | for (auto i = 0; i < lexemes.length; i++) { 250 | auto lexeme = lexemes[i]; 251 | auto literal = lexeme.pattern; 252 | if (literal[0] in literals) { 253 | try { 254 | literals[literal[0]].add_tail(literal[1..$], i); 255 | } catch (LexanException edata) { 256 | throw new LexanDuplicateLiteralPattern(literal); 257 | } 258 | } else { 259 | literals[literal[0]] = new LiteralMatchNode!(H)(literal[1..$], i); 260 | } 261 | } 262 | } 263 | 264 | LiteralLexeme!(H) get_longest_match(string target) 265 | { 266 | LiteralLexeme!(H) lvm; 267 | auto lits = literals; 268 | for (auto index = 0; index < target.length && target[index] in lits; index++) { 269 | if (lits[target[index]].lexeme_index >= 0) 270 | lvm = lexemes[lits[target[index]].lexeme_index]; 271 | lits = lits[target[index]].tails; 272 | } 273 | return lvm; 274 | } 275 | } 276 | unittest { 277 | import std.exception; 278 | auto test_strings = ["alpha", "beta", "gamma", "delta", "test", "tes", "tenth", "alpine", "gammon", "gamble"]; 279 | LiteralLexeme!int[] test_lexemes; 280 | for (auto i = 0; i < test_strings.length; i++) { 281 | test_lexemes ~= LiteralLexeme!int(i, test_strings[i]); 282 | } 283 | auto rubbish = "garbage"; 284 | auto lm = new LiteralMatcher!int(test_lexemes); 285 | foreach(test_string; test_strings) { 286 | assert(lm.get_longest_match(test_string).is_valid); 287 | assert(lm.get_longest_match(rubbish ~ test_string).is_valid == false); 288 | assert(lm.get_longest_match((rubbish ~ test_string)[rubbish.length..$]).is_valid == true); 289 | assert(lm.get_longest_match((rubbish ~ test_string ~ rubbish)[rubbish.length..$]).is_valid == true); 290 | assert(lm.get_longest_match(test_string ~ rubbish).is_valid == true); 291 | } 292 | foreach(test_string; test_strings) { 293 | assert(lm.get_longest_match(test_string).pattern == test_string); 294 | assert(lm.get_longest_match(rubbish ~ test_string).is_valid == false); 295 | assert(lm.get_longest_match((rubbish ~ test_string)[rubbish.length..$]).pattern == test_string); 296 | assert(lm.get_longest_match((rubbish ~ test_string ~ rubbish)[rubbish.length..$]).pattern == test_string); 297 | assert(lm.get_longest_match(test_string ~ rubbish).pattern == test_string); 298 | } 299 | auto bad_strings = test_strings ~ "gamma"; 300 | LiteralLexeme!int[] bad_lexemes; 301 | for (auto i = 0; i < bad_strings.length; i++) { 302 | bad_lexemes ~= LiteralLexeme!int(i, bad_strings[i]); 303 | } 304 | try { 305 | auto bad_lm = new LiteralMatcher!int(bad_lexemes); 306 | assert(false, "should blow up before here!"); 307 | } catch (LexanDuplicateLiteralPattern edata) { 308 | assert(edata.duplicate_pattern == "gamma"); 309 | } 310 | } 311 | 312 | struct CharLocation { 313 | // Line numbers and offsets both start at 1 (i.e. human friendly) 314 | // as these are used for error messages. 315 | size_t index; 316 | size_t line_number; 317 | size_t offset; 318 | string label; // e.g. name of file that text came from 319 | 320 | const string toString() 321 | { 322 | if (label.length > 0) { 323 | return format("%s:%s(%s)", label, line_number, offset); 324 | } else { 325 | return format("%s(%s)", line_number, offset); 326 | } 327 | } 328 | } 329 | 330 | class Token(H) { 331 | private: 332 | H _handle; 333 | string _matched_text; 334 | CharLocation _location; 335 | bool _is_valid_match; 336 | 337 | public: 338 | this(H handle, string text, CharLocation locn) 339 | { 340 | _handle = handle; 341 | _matched_text = text; 342 | _location = locn; 343 | _is_valid_match = true; 344 | } 345 | 346 | this(string text, CharLocation locn) 347 | { 348 | _matched_text = text; 349 | _location = locn; 350 | _is_valid_match = false; 351 | } 352 | 353 | @property 354 | H handle() 355 | { 356 | if (!_is_valid_match) throw new LexanInvalidToken(_matched_text, location); 357 | return _handle; 358 | } 359 | 360 | @property 361 | ref string matched_text() 362 | { 363 | return _matched_text; 364 | } 365 | 366 | @property 367 | CharLocation location() 368 | { 369 | return _location; 370 | } 371 | 372 | @property 373 | bool is_valid_match() 374 | { 375 | return _is_valid_match; 376 | } 377 | } 378 | 379 | struct HandleAndText(H) { 380 | H handle; 381 | string text; 382 | 383 | @property 384 | size_t length() 385 | { 386 | return text.length; 387 | } 388 | 389 | @property 390 | bool is_valid() 391 | { 392 | return text.length > 0; 393 | } 394 | } 395 | 396 | interface LexicalAnalyserIfce(H) { 397 | size_t get_skippable_count(string text); 398 | LiteralLexeme!(H) get_longest_literal_match(string text); 399 | HandleAndText!(H)[] get_longest_regex_match(string text); 400 | size_t distance_to_next_valid_input(string text); 401 | TokenForwardRange!(H) token_forward_range(string text, string label); 402 | TokenForwardRange!(H) token_forward_range(string text, string label, H end_handle); 403 | InjectableTokenForwardRange!(H) injectable_token_forward_range(string text, string label); 404 | InjectableTokenForwardRange!(H) injectable_token_forward_range(string text, string label, H end_handle); 405 | } 406 | 407 | class LexicalAnalyser(H, RE): LexicalAnalyserIfce!(H) { 408 | private LiteralMatcher!(H) literal_matcher; 409 | private RegexLexeme!(H, RE)[] re_lexemes; 410 | private RE[] skip_re_list; 411 | 412 | this(ref LiteralLexeme!(H)[] lit_lexemes, ref RegexLexeme!(H, RE)[] re_lexemes, ref RE[] skip_re_list) 413 | in { 414 | // Make sure handles are unique 415 | for (auto i = 0; i < (lit_lexemes.length - 1); i++) { 416 | for (auto j = i + 1; j < lit_lexemes.length; j++) { 417 | assert(lit_lexemes[i].handle != lit_lexemes[j].handle); 418 | } 419 | } 420 | for (auto i = 0; i < (re_lexemes.length - 1); i++) { 421 | for (auto j = i + 1; j < re_lexemes.length; j++) { 422 | assert(re_lexemes[i].handle != re_lexemes[j].handle); 423 | } 424 | } 425 | for (auto i = 0; i < lit_lexemes.length; i++) { 426 | for (auto j = 0; j < re_lexemes.length; j++) { 427 | assert(lit_lexemes[i].handle != re_lexemes[j].handle); 428 | } 429 | } 430 | } 431 | body { 432 | literal_matcher = new LiteralMatcher!(H)(lit_lexemes); 433 | this.re_lexemes = re_lexemes; 434 | this.skip_re_list = skip_re_list; 435 | } 436 | 437 | size_t get_skippable_count(string text) 438 | { 439 | size_t index = 0; 440 | while (index < text.length) { 441 | auto skips = 0; 442 | foreach (skip_re; skip_re_list) { 443 | auto m = match(text[index..$], skip_re); 444 | if (!m.empty) { 445 | index += m.hit.length; 446 | skips++; 447 | } 448 | } 449 | if (skips == 0) break; 450 | } 451 | return index; 452 | } 453 | 454 | LiteralLexeme!(H) get_longest_literal_match(string text) 455 | { 456 | return literal_matcher.get_longest_match(text); 457 | } 458 | 459 | HandleAndText!(H)[] get_longest_regex_match(string text) 460 | { 461 | HandleAndText!(H)[] hat; 462 | 463 | foreach (re_lexeme; re_lexemes) { 464 | auto m = match(text, re_lexeme.re); 465 | // TODO: check for two or more of the same length 466 | // and throw a wobbly 467 | if (m) { 468 | if (hat.length == 0 || hat[0].length == m.hit.length) { 469 | hat ~= HandleAndText!(H)(re_lexeme.handle, m.hit); 470 | } else if (m.hit.length > hat.length) { 471 | hat = [HandleAndText!(H)(re_lexeme.handle, m.hit)]; 472 | } 473 | } 474 | } 475 | 476 | return hat; 477 | } 478 | 479 | size_t distance_to_next_valid_input(string text) 480 | { 481 | size_t index = 0; 482 | mainloop: while (index < text.length) { 483 | // Assume that the front of the text is invalid 484 | // TODO: put in precondition to that effect 485 | index++; 486 | if (literal_matcher.get_longest_match(text[index..$]).is_valid) break; 487 | foreach (re_lexeme; re_lexemes) { 488 | if (match(text[index..$], re_lexeme.re)) break mainloop; 489 | } 490 | foreach (skip_re; skip_re_list) { 491 | if (match(text[index..$], skip_re)) break mainloop; 492 | } 493 | } 494 | return index; 495 | } 496 | 497 | TokenForwardRange!(H) token_forward_range(string text, string label) 498 | { 499 | return TokenForwardRange!(H)(this, text, label); 500 | } 501 | 502 | TokenForwardRange!(H) token_forward_range(string text, string label, H end_handle) 503 | { 504 | return TokenForwardRange!(H)(this, text, label, end_handle); 505 | } 506 | 507 | InjectableTokenForwardRange!(H) injectable_token_forward_range(string text, string label) 508 | { 509 | return InjectableTokenForwardRange!(H)(this, text, label); 510 | } 511 | 512 | InjectableTokenForwardRange!(H) injectable_token_forward_range(string text, string label, H end_handle) 513 | { 514 | return InjectableTokenForwardRange!(H)(this, text, label, end_handle); 515 | } 516 | } 517 | 518 | struct TokenForwardRange(H) { 519 | LexicalAnalyserIfce!(H) analyser; 520 | private string input_text; 521 | private CharLocation index_location; 522 | private Token!(H) current_match; 523 | private H end_handle; 524 | private bool send_end_of_input; 525 | 526 | this (LexicalAnalyserIfce!(H) analyser, string text, string label) 527 | { 528 | this.analyser = analyser; 529 | index_location = CharLocation(0, 1, 1, label); 530 | input_text = text; 531 | current_match = advance(); 532 | } 533 | 534 | this (LexicalAnalyserIfce!(H) analyser, string text, string label, H end_handle) 535 | { 536 | this.end_handle = end_handle; 537 | this.send_end_of_input = true; 538 | this(analyser, text, label); 539 | } 540 | 541 | private void incr_index_location(size_t length) 542 | { 543 | auto next_index = index_location.index + length; 544 | for (auto i = index_location.index; i < next_index; i++) { 545 | static if (newline.length == 1) { 546 | if (newline[0] == input_text[i]) { 547 | index_location.line_number++; 548 | index_location.offset = 1; 549 | } else { 550 | index_location.offset++; 551 | } 552 | } else { 553 | if (newline == input_text[i..i + newline.length]) { 554 | index_location.line_number++; 555 | index_location.offset = 0; 556 | } else { 557 | index_location.offset++; 558 | } 559 | } 560 | } 561 | index_location.index = next_index; 562 | } 563 | 564 | private Token!(H) advance() 565 | { 566 | while (index_location.index < input_text.length) { 567 | // skips have highest priority 568 | incr_index_location(analyser.get_skippable_count(input_text[index_location.index..$])); 569 | if (index_location.index >= input_text.length) break; 570 | 571 | // The reported location is for the first character of the match 572 | auto location = index_location; 573 | 574 | // Find longest match found by literal match or regex 575 | auto llm = analyser.get_longest_literal_match(input_text[index_location.index..$]); 576 | 577 | auto lrem = analyser.get_longest_regex_match(input_text[index_location.index..$]); 578 | 579 | if (llm.is_valid && (lrem.length == 0 || llm.length >= lrem[0].length)) { 580 | // if the matches are of equal length literal wins 581 | incr_index_location(llm.length); 582 | return new Token!(H)(llm.handle, llm.pattern, location); 583 | } else if (lrem.length == 1) { 584 | incr_index_location(lrem[0].length); 585 | return new Token!(H)(lrem[0].handle, lrem[0].text, location); 586 | } else if (lrem.length > 1) { 587 | incr_index_location(lrem[0].length); 588 | throw new LexanMultipleRegexMatches!(H)(lrem, location); 589 | } else { 590 | // Failure: send back the offending character(s) and location 591 | auto start = index_location.index; 592 | incr_index_location(analyser.distance_to_next_valid_input(input_text[index_location.index..$])); 593 | return new Token!(H)(input_text[start..index_location.index], location); 594 | } 595 | } 596 | 597 | if (send_end_of_input) { 598 | send_end_of_input = false; // so we don't send multiple end tokens 599 | return new Token!(H)(end_handle, "", index_location); 600 | } 601 | return null; 602 | } 603 | 604 | @property 605 | bool empty() 606 | { 607 | return current_match is null; 608 | } 609 | 610 | @property 611 | Token!(H) front() 612 | { 613 | return current_match; 614 | } 615 | 616 | void popFront() 617 | { 618 | current_match = advance(); 619 | } 620 | 621 | Token!(H) moveFront() 622 | { 623 | auto retval = current_match; 624 | current_match = advance(); 625 | return retval; 626 | } 627 | 628 | TokenForwardRange!(H) save() 629 | { 630 | return this; 631 | } 632 | } 633 | 634 | unittest { 635 | import std.exception; 636 | auto lit_lexemes = [ 637 | LiteralLexeme!string("IF", "if"), 638 | LiteralLexeme!string("WHEN", "when"), 639 | ]; 640 | auto re_lexemes = [ 641 | RegexLexeme!(string, Regex!char)("IDENT", regex("^[a-zA-Z]+[\\w_]*")), 642 | RegexLexeme!(string, Regex!char)("BTEXTL", regex(r"^&\{(.|[\n\r])*&\}")), 643 | RegexLexeme!(string, Regex!char)("PRED", regex(r"^\?\{(.|[\n\r])*\?\}")), 644 | RegexLexeme!(string, Regex!char)("LITERAL", regex("^(\"\\S+\")")), 645 | RegexLexeme!(string, Regex!char)("ACTION", regex(r"^(!\{(.|[\n\r])*?!\})")), 646 | RegexLexeme!(string, Regex!char)("PREDICATE", regex(r"^(\?\((.|[\n\r])*?\?\))")), 647 | RegexLexeme!(string, Regex!char)("CODE", regex(r"^(%\{(.|[\n\r])*?%\})")), 648 | RegexLexeme!(string, Regex!char)("MORSE", regex(r"^(%\{(.|[\n\r])*?%\})")), 649 | ]; 650 | auto skip_re_list = [ 651 | regex(r"^(/\*(.|[\n\r])*?\*/)"), // D multi line comment 652 | regex(r"^(//[^\n\r]*)"), // D EOL comment 653 | regex(r"^(\s+)"), // White space 654 | ]; 655 | auto laspec = new LexicalAnalyser!(string, Regex!char)(lit_lexemes, re_lexemes, skip_re_list); 656 | auto la = laspec.token_forward_range("if iffy\n \"quoted\" \"if\" \n9 $ \tname &{ one \n two &} and so ?{on?}", ""); 657 | auto m = la.front(); la.popFront(); 658 | assert(m.handle == "IF" && m.matched_text == "if" && m.location.line_number == 1); 659 | m = la.front(); la.popFront(); 660 | assert(m.handle == "IDENT" && m.matched_text == "iffy" && m.location.line_number == 1); 661 | m = la.front(); la.popFront(); 662 | assert(m.handle == "LITERAL" && m.matched_text == "\"quoted\"" && m.location.line_number == 2); 663 | m = la.front(); la.popFront(); 664 | assert(m.handle == "LITERAL" && m.matched_text == "\"if\"" && m.location.line_number == 2); 665 | m = la.front(); la.popFront(); 666 | assert(!m.is_valid_match && m.matched_text == "9" && m.location.line_number == 3); 667 | assertThrown!LexanInvalidToken(m.handle != "blah blah blah"); 668 | m = la.front(); la.popFront(); 669 | assert(!m.is_valid_match && m.matched_text == "$" && m.location.line_number == 3); 670 | m = la.front(); la.popFront(); 671 | assert(m.handle == "IDENT" && m.matched_text == "name" && m.location.line_number == 3); 672 | auto saved_la = la.save(); 673 | m = la.front(); la.popFront(); 674 | assert(m.handle == "BTEXTL" && m.matched_text == "&{ one \n two &}" && m.location.line_number == 3); 675 | m = la.front(); la.popFront(); 676 | assert(m.handle == "IDENT" && m.matched_text == "and" && m.location.line_number == 4); 677 | m = saved_la.moveFront(); 678 | assert(m.handle == "BTEXTL" && m.matched_text == "&{ one \n two &}" && m.location.line_number == 3); 679 | m = saved_la.moveFront(); 680 | assert(m.handle == "IDENT" && m.matched_text == "and" && m.location.line_number == 4); 681 | m = la.front(); la.popFront(); 682 | assert(m.handle == "IDENT" && m.matched_text == "so" && m.location.line_number == 4); 683 | m = la.front(); la.popFront(); 684 | assert(m.handle == "PRED" && m.matched_text == "?{on?}" && m.location.line_number == 4); 685 | assert(la.empty); 686 | m = saved_la.moveFront(); 687 | assert(m.handle == "IDENT" && m.matched_text == "so" && m.location.line_number == 4); 688 | m = saved_la.moveFront(); 689 | assert(m.handle == "PRED" && m.matched_text == "?{on?}" && m.location.line_number == 4); 690 | assert(saved_la.empty); 691 | la = laspec.token_forward_range(" 692 | some identifiers 693 | // a single line comment with \"quote\" 694 | some more identifiers. 695 | /* a 696 | multi line 697 | comment */ 698 | 699 | \"+=\" and more ids. 700 | \"\"\" 701 | and an action !{ some D code !} and a predicate ?( a boolean expression ?) 702 | and some included code %{ 703 | kllkkkl 704 | hl;ll 705 | %} 706 | ", ""); 707 | m = la.front(); la.popFront(); 708 | assert(m.handle == "IDENT" && m.matched_text == "some" && m.location.line_number == 2); 709 | m = la.moveFront(); 710 | assert(m.handle == "IDENT" && m.matched_text == "identifiers" && m.location.line_number == 2); 711 | m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); 712 | assert(!m.is_valid_match); 713 | m = la.front(); la.popFront(); 714 | assert(m.handle == "LITERAL" && m.matched_text == "\"+=\"" && m.location.line_number == 9); 715 | m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); 716 | m = la.front(); la.popFront(); 717 | assert(m.handle == "LITERAL" && m.matched_text == "\"\"\"" && m.location.line_number == 10); 718 | m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); 719 | m = la.front(); la.popFront(); 720 | assert(m.handle == "ACTION" && m.matched_text == "!{ some D code !}" && m.location.line_number == 11); 721 | m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); 722 | m = la.front(); la.popFront(); 723 | assert(m.handle == "PREDICATE" && m.matched_text == "?( a boolean expression ?)" && m.location.line_number == 11); 724 | try { 725 | m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); m = la.front(); la.popFront(); 726 | m = la.front(); la.popFront(); 727 | assert(m.handle == "CODE" && m.matched_text == "%{\n kllkkkl\n hl;ll\n%}" && m.location.line_number == 12); 728 | assert(false, "Blows up before here!"); 729 | } catch (LexanMultipleRegexMatches!(string) edata) { 730 | assert(edata.handles == ["CODE", "MORSE"]); 731 | } 732 | auto ilit_lexemes = [ 733 | LiteralLexeme!int(0, "if"), 734 | LiteralLexeme!int(8, "when"), 735 | ]; 736 | auto ire_lexemes = [ 737 | RegexLexeme!(int, Regex!char)(1, regex("^[a-zA-Z]+[\\w_]*")), 738 | RegexLexeme!(int, Regex!char)(2, regex(r"^&\{(.|[\n\r])*&\}")), 739 | RegexLexeme!(int, Regex!char)(3, regex(r"^\?\{(.|[\n\r])*\?\}")), 740 | RegexLexeme!(int, Regex!char)(4, regex("^(\"\\S+\")")), 741 | RegexLexeme!(int, Regex!char)(5, regex(r"^(!\{(.|[\n\r])*?!\})")), 742 | RegexLexeme!(int, Regex!char)(6, regex(r"^(\?\((.|[\n\r])*?\?\))")), 743 | RegexLexeme!(int, Regex!char)(7, regex(r"^(%\{(.|[\n\r])*?%\})")), 744 | ]; 745 | auto ilaspec = new LexicalAnalyser!(int, Regex!char)(ilit_lexemes, ire_lexemes, skip_re_list); 746 | auto ila = ilaspec.token_forward_range("if iffy\n \"quoted\" $! %%name \"if\" \n9 $ \tname &{ one \n two &} and so ?{on?}", ""); 747 | auto im = ila.front(); ila.popFront(); 748 | assert(im.handle == 0 && im.matched_text == "if" && im.location.line_number == 1); 749 | im = ila.front(); ila.popFront(); 750 | assert(im.handle == 1 && im.matched_text == "iffy" && im.location.line_number == 1); 751 | im = ila.front(); ila.popFront(); 752 | assert(im.handle == 4 && im.matched_text == "\"quoted\"" && im.location.line_number == 2); 753 | im = ila.moveFront(); 754 | assert(!im.is_valid_match && im.matched_text == "$!" && im.location.line_number == 2); 755 | im = ila.front(); ila.popFront(); 756 | assert(!im.is_valid_match && im.matched_text == "%%" && im.location.line_number == 2); 757 | } 758 | 759 | struct InjectableTokenForwardRange(H) { 760 | LexicalAnalyserIfce!(H) analyser; 761 | TokenForwardRange!(H)[] token_range_stack; 762 | 763 | this (LexicalAnalyserIfce!(H) analyser, string text, string label) 764 | { 765 | this.analyser = analyser; 766 | token_range_stack ~= analyser.token_forward_range(text, label); 767 | } 768 | 769 | this (LexicalAnalyserIfce!(H) analyser, string text, string label, H end_handle) 770 | { 771 | this.analyser = analyser; 772 | token_range_stack ~= analyser.token_forward_range(text, label, end_handle); 773 | } 774 | 775 | void inject(string text, string label) 776 | { 777 | token_range_stack ~= analyser.token_forward_range(text, label); 778 | } 779 | 780 | @property 781 | bool empty() 782 | { 783 | return token_range_stack.length == 0; 784 | } 785 | 786 | @property 787 | Token!(H) front() 788 | { 789 | if (token_range_stack.length == 0) return null; 790 | return token_range_stack[$ - 1].current_match; 791 | } 792 | 793 | void popFront() 794 | { 795 | token_range_stack[$ - 1].popFront(); 796 | while (token_range_stack.length > 0 && token_range_stack[$ - 1].empty) token_range_stack.length--; 797 | } 798 | 799 | Token!(H) moveFront() 800 | { 801 | auto retval = front; 802 | popFront(); 803 | return retval; 804 | } 805 | 806 | InjectableTokenForwardRange!(H) save() 807 | { 808 | InjectableTokenForwardRange!(H) retval; 809 | retval.analyser = analyser; 810 | foreach (token_range; token_range_stack) { 811 | retval.token_range_stack ~= token_range.save(); 812 | } 813 | return retval; 814 | } 815 | } 816 | unittest { 817 | auto lit_lexemes = [ 818 | LiteralLexeme!string("IF", "if"), 819 | LiteralLexeme!string("WHEN", "when"), 820 | ]; 821 | auto re_lexemes = [ 822 | RegexLexeme!(string, Regex!char)("IDENT", regex("^[a-zA-Z]+[\\w_]*")), 823 | RegexLexeme!(string, Regex!char)("BTEXTL", regex(r"^&\{(.|[\n\r])*&\}")), 824 | RegexLexeme!(string, Regex!char)("PRED", regex(r"^\?\{(.|[\n\r])*\?\}")), 825 | RegexLexeme!(string, Regex!char)("LITERAL", regex("^(\"\\S+\")")), 826 | RegexLexeme!(string, Regex!char)("ACTION", regex(r"^(!\{(.|[\n\r])*?!\})")), 827 | RegexLexeme!(string, Regex!char)("PREDICATE", regex(r"^(\?\((.|[\n\r])*?\?\))")), 828 | RegexLexeme!(string, Regex!char)("CODE", regex(r"^(%\{(.|[\n\r])*?%\})")), 829 | ]; 830 | auto skip_re_list = [ 831 | regex(r"^(/\*(.|[\n\r])*?\*/)"), // D multi line comment 832 | regex(r"^(//[^\n\r]*)"), // D EOL comment 833 | regex(r"^(\s+)"), // White space 834 | ]; 835 | auto laspec = new LexicalAnalyser!(string, Regex!char)(lit_lexemes, re_lexemes, skip_re_list); 836 | auto ila = laspec.injectable_token_forward_range("if iffy\n \"quoted\" \"if\" \n9 $ \tname &{ one \n two &} and so ?{on?}", "one"); 837 | auto m = ila.front(); ila.popFront(); 838 | assert(m.handle == "IF" && m.matched_text == "if" && m.location.line_number == 1); 839 | m = ila.front(); ila.popFront(); 840 | assert(m.handle == "IDENT" && m.matched_text == "iffy" && m.location.line_number == 1); 841 | m = ila.front(); ila.popFront(); 842 | assert(m.handle == "LITERAL" && m.matched_text == "\"quoted\"" && m.location.line_number == 2); 843 | m = ila.moveFront(); 844 | assert(m.handle == "LITERAL" && m.matched_text == "\"if\"" && m.location.line_number == 2); 845 | m = ila.front(); ila.popFront(); 846 | assert(!m.is_valid_match && m.matched_text == "9" && m.location.line_number == 3); 847 | ila.inject("if one \"name\"", "two"); 848 | m = ila.front(); ila.popFront(); 849 | assert(m.handle == "IF" && m.matched_text == "if" && m.location.line_number == 1 && m.location.label == "two"); 850 | auto saved_ila = ila.save(); 851 | m = ila.front(); ila.popFront(); 852 | assert(m.handle == "IDENT" && m.matched_text == "one" && m.location.line_number == 1 && m.location.label == "two"); 853 | m = ila.front(); ila.popFront(); 854 | assert(m.handle == "LITERAL" && m.matched_text == "\"name\"" && m.location.line_number == 1 && m.location.label == "two"); 855 | m = ila.front(); ila.popFront(); 856 | assert(!m.is_valid_match && m.matched_text == "$" && m.location.line_number == 3); 857 | m = ila.front(); ila.popFront(); 858 | assert(m.handle == "IDENT" && m.matched_text == "name" && m.location.line_number == 3); 859 | m = saved_ila.moveFront(); 860 | assert(m.handle == "IDENT" && m.matched_text == "one" && m.location.line_number == 1 && m.location.label == "two"); 861 | m = saved_ila.moveFront(); 862 | assert(m.handle == "LITERAL" && m.matched_text == "\"name\"" && m.location.line_number == 1 && m.location.label == "two"); 863 | m = saved_ila.moveFront(); 864 | assert(!m.is_valid_match && m.matched_text == "$" && m.location.line_number == 3); 865 | m = saved_ila.moveFront(); 866 | assert(m.handle == "IDENT" && m.matched_text == "name" && m.location.line_number == 3); 867 | m = ila.front(); ila.popFront(); 868 | assert(m.handle == "BTEXTL" && m.matched_text == "&{ one \n two &}" && m.location.line_number == 3); 869 | m = ila.front(); ila.popFront(); 870 | assert(m.handle == "IDENT" && m.matched_text == "and" && m.location.line_number == 4); 871 | m = ila.front(); ila.popFront(); 872 | assert(m.handle == "IDENT" && m.matched_text == "so" && m.location.line_number == 4); 873 | m = ila.front(); ila.popFront(); 874 | assert(m.handle == "PRED" && m.matched_text == "?{on?}" && m.location.line_number == 4); 875 | assert(ila.empty); 876 | m = saved_ila.moveFront(); 877 | assert(m.handle == "BTEXTL" && m.matched_text == "&{ one \n two &}" && m.location.line_number == 3); 878 | m = saved_ila.moveFront(); 879 | assert(m.handle == "IDENT" && m.matched_text == "and" && m.location.line_number == 4); 880 | m = saved_ila.moveFront(); 881 | assert(m.handle == "IDENT" && m.matched_text == "so" && m.location.line_number == 4); 882 | m = saved_ila.moveFront(); 883 | assert(m.handle == "PRED" && m.matched_text == "?{on?}" && m.location.line_number == 4); 884 | assert(saved_ila.empty); 885 | } 886 | -------------------------------------------------------------------------------- /ddlib/templates.d: -------------------------------------------------------------------------------- 1 | // templates.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // This file is part of grallina and dunnart. 6 | // 7 | // grallina and dunnart are free software; you can redistribute it and/or modify 8 | // it under the terms of the GNU Lesser General Public License 9 | // as published by the Free Software Foundation; either version 3 10 | // of the License, or (at your option) any later version, with 11 | // some exceptions, please read the COPYING file. 12 | // 13 | // grallina is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU Lesser General Public License for more details. 17 | // 18 | // You should have received a copy of the GNU Lesser General Public License 19 | // along with grallina; if not, write to the Free Software 20 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 21 | 22 | module ddlib.templates; 23 | 24 | mixin template DDParserSupport() { 25 | import std.conv; 26 | import std.string; 27 | import std.stdio; 28 | import std.regex; 29 | 30 | import ddlexan = ddlib.lexan; 31 | 32 | alias ddlexan.LiteralLexeme!DDHandle DDLiteralLexeme; 33 | template DDRegexLexeme(DDHandle handle, string script) { 34 | static if (script[0] == '^') { 35 | enum DDRegexLexeme = ddlexan.RegexLexeme!(DDHandle, Regex!char)(handle, regex(script)); 36 | } else { 37 | enum DDRegexLexeme = ddlexan.RegexLexeme!(DDHandle, Regex!char)(handle, regex("^" ~ script)); 38 | } 39 | } 40 | alias ddlexan.LexicalAnalyser!(DDHandle, Regex!char) DDLexicalAnalyser; 41 | alias ddlexan.CharLocation DDCharLocation; 42 | alias ddlexan.Token!DDHandle DDToken; 43 | 44 | enum DDParseActionType { SHIFT, REDUCE, ACCEPT }; 45 | struct DDParseAction { 46 | DDParseActionType action; 47 | union { 48 | DDProduction production_id; 49 | DDParserState next_state; 50 | } 51 | } 52 | 53 | template dd_shift(DDParserState dd_state) { 54 | enum dd_shift = DDParseAction(DDParseActionType.SHIFT, dd_state); 55 | } 56 | 57 | template dd_reduce(DDProduction dd_production) { 58 | enum dd_reduce = DDParseAction(DDParseActionType.REDUCE, dd_production); 59 | } 60 | 61 | template dd_accept() { 62 | enum dd_accept = DDParseAction(DDParseActionType.ACCEPT, 0); 63 | } 64 | 65 | struct DDProductionData { 66 | DDNonTerminal left_hand_side; 67 | size_t length; 68 | } 69 | 70 | class DDSyntaxError: Exception { 71 | DDHandle[] expected_tokens; 72 | 73 | this(DDHandle[] expected_tokens, string file=__FILE__, size_t line=__LINE__, Throwable next=null) 74 | { 75 | this.expected_tokens = expected_tokens; 76 | string msg = format("Syntax Error: expected %s.", expected_tokens); 77 | super(msg, file, line, next); 78 | } 79 | } 80 | 81 | class DDSyntaxErrorData { 82 | DDHandle unexpected_token; 83 | string matched_text; 84 | DDCharLocation location; 85 | DDHandle[] expected_tokens; 86 | long skipped_count; 87 | 88 | this(DDHandle dd_token, DDAttributes dd_attrs, DDHandle[] dd_token_list) 89 | { 90 | unexpected_token = dd_token; 91 | matched_text = dd_attrs.dd_matched_text; 92 | location = dd_attrs.dd_location; 93 | expected_tokens = dd_token_list; 94 | skipped_count = -1; 95 | } 96 | 97 | override string toString() 98 | { 99 | string str; 100 | if (unexpected_token == DDHandle.ddINVALID_TOKEN) { 101 | str = format("%s: Unexpected input: %s", location, matched_text); 102 | } else { 103 | str = format("%s: Syntax Error: ", location.line_number); 104 | if (unexpected_token == DDHandle.ddEND) { 105 | str ~= "unexpected end of input: "; 106 | } else { 107 | auto literal = dd_literal_token_string(unexpected_token); 108 | if (literal is null) { 109 | str ~= format("found %s (\"%s\"): ", unexpected_token, matched_text); 110 | } else { 111 | str ~= format("found \"%s\": ", literal); 112 | } 113 | } 114 | str ~= format("expected %s.", expected_tokens_as_string()); 115 | } 116 | return str; 117 | } 118 | 119 | string expected_tokens_as_string() 120 | { 121 | auto str = dd_literal_token_string(expected_tokens[0]); 122 | if (str is null) { 123 | str = to!(string)(expected_tokens[0]); 124 | } else { 125 | str = format("\"%s\"", str); 126 | } 127 | for (auto i = 1; i < expected_tokens.length - 1; i++) { 128 | auto literal = dd_literal_token_string(expected_tokens[i]); 129 | if (literal is null) { 130 | str ~= format(", %s", to!(string)(expected_tokens[i])); 131 | } else { 132 | str ~= format(", \"%s\"", literal); 133 | } 134 | } 135 | if (expected_tokens.length > 1) { 136 | auto literal = dd_literal_token_string(expected_tokens[$ - 1]); 137 | if (literal is null) { 138 | str ~= format(" or %s", to!(string)(expected_tokens[$ - 1])); 139 | } else { 140 | str ~= format(" or \"%s\"", literal); 141 | } 142 | } 143 | return str; 144 | } 145 | } 146 | } 147 | 148 | mixin template DDImplementParser() { 149 | import std.stdio; 150 | 151 | struct DDParseStack { 152 | struct StackElement { 153 | DDSymbol symbol_id; 154 | DDParserState state; 155 | } 156 | static const STACK_LENGTH_INCR = 100; 157 | StackElement[] state_stack; 158 | DDAttributes[] attr_stack; 159 | size_t height; 160 | DDParserState last_error_state; 161 | 162 | invariant() { 163 | assert(state_stack.length == attr_stack.length); 164 | assert(height <= state_stack.length); 165 | } 166 | 167 | private @property 168 | size_t index() 169 | { 170 | return height - 1; 171 | } 172 | 173 | private @property 174 | DDParserState current_state() 175 | { 176 | return state_stack[index].state; 177 | } 178 | 179 | private @property 180 | DDSymbol top_symbol() 181 | { 182 | return state_stack[index].symbol_id; 183 | } 184 | 185 | private @property 186 | ref DDAttributes top_attributes() 187 | { 188 | return attr_stack[index]; 189 | } 190 | 191 | private @property 192 | DDAttributes[] attributes_stack() 193 | { 194 | return attr_stack[0..height]; 195 | } 196 | 197 | private 198 | void push(DDSymbol symbol_id, DDParserState state) 199 | { 200 | height += 1; 201 | if (height >= state_stack.length) { 202 | state_stack ~= new StackElement[STACK_LENGTH_INCR]; 203 | attr_stack ~= new DDAttributes[STACK_LENGTH_INCR]; 204 | } 205 | state_stack[index] = StackElement(symbol_id, state); 206 | } 207 | 208 | private 209 | void push(DDHandle dd_token, DDParserState state, DDAttributes attrs) 210 | { 211 | push(dd_token, state); 212 | attr_stack[index] = attrs; 213 | last_error_state = 0; // Reset the last error state on shift 214 | } 215 | 216 | private 217 | void push(DDSymbol symbol_id, DDParserState state, DDSyntaxErrorData error_data) 218 | { 219 | push(symbol_id, state); 220 | attr_stack[index].dd_syntax_error_data = error_data; 221 | } 222 | 223 | private 224 | DDAttributes[] pop(size_t count) 225 | { 226 | if (count == 0) return []; 227 | height -= count; 228 | return attr_stack[height..height + count].dup; 229 | } 230 | 231 | private 232 | void reduce(DDProduction production_id, void delegate(string, string) dd_inject) 233 | { 234 | auto productionData = dd_get_production_data(production_id); 235 | auto attrs = pop(productionData.length); 236 | auto nextState = dd_get_goto_state(productionData.left_hand_side, current_state); 237 | push(productionData.left_hand_side, nextState); 238 | dd_do_semantic_action(top_attributes, production_id, attrs, dd_inject); 239 | } 240 | 241 | private 242 | int find_viable_recovery_state(DDHandle current_token) 243 | { 244 | int distance_to_viable_state = 0; 245 | while (distance_to_viable_state < height) { 246 | auto candidate_state = state_stack[index - distance_to_viable_state].state; 247 | if (candidate_state != last_error_state && dd_error_recovery_ok(candidate_state, current_token)) { 248 | last_error_state = candidate_state; 249 | return distance_to_viable_state; 250 | } 251 | distance_to_viable_state++; 252 | } 253 | return -1; /// Failure 254 | } 255 | } 256 | 257 | bool dd_parse_text(string text, string label="") 258 | { 259 | auto tokens = dd_lexical_analyser.injectable_token_forward_range(text, label, DDHandle.ddEND); 260 | auto parse_stack = DDParseStack(); 261 | parse_stack.push(DDNonTerminal.ddSTART, 0); 262 | auto token = tokens.moveFront(); 263 | with (parse_stack) with (DDParseActionType) { 264 | try_again: 265 | try { 266 | while (true) { 267 | auto next_action = dd_get_next_action(current_state, token.handle, attributes_stack); 268 | while (next_action.action == REDUCE) { 269 | reduce(next_action.production_id, &tokens.inject); 270 | next_action = dd_get_next_action(current_state, token.handle, attributes_stack); 271 | } 272 | if (next_action.action == SHIFT) { 273 | push(token.handle, next_action.next_state, DDAttributes(token)); 274 | token = tokens.moveFront(); 275 | } else if (next_action.action == ACCEPT) { 276 | return true; 277 | } 278 | } 279 | } catch (ddlexan.LexanInvalidToken edata) { 280 | token = new DDToken(DDHandle.ddINVALID_TOKEN, edata.unexpected_text, edata.location); 281 | goto try_again; 282 | } catch (DDSyntaxError edata) { 283 | auto error_data = new DDSyntaxErrorData(token.handle, DDAttributes(token), edata.expected_tokens); 284 | auto distance_to_viable_state = find_viable_recovery_state(token.handle); 285 | while (distance_to_viable_state < 0 && token.handle != DDHandle.ddEND) { 286 | token = tokens.moveFront(); 287 | error_data.skipped_count++; 288 | distance_to_viable_state = find_viable_recovery_state(token.handle); 289 | } 290 | if (distance_to_viable_state >= 0) { 291 | pop(distance_to_viable_state); 292 | push(DDNonTerminal.ddERROR, dd_get_goto_state(DDNonTerminal.ddERROR, current_state), error_data); 293 | goto try_again; 294 | } else { 295 | stderr.writeln(error_data); 296 | return false; 297 | } 298 | } 299 | } 300 | stderr.writeln("Unexpected end of input."); 301 | return false; 302 | } 303 | } 304 | -------------------------------------------------------------------------------- /ddpg.d: -------------------------------------------------------------------------------- 1 | #!/usr/bin/rdmd 2 | // ddpg.d 3 | // 4 | // Copyright Peter Williams 2013 . 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. 7 | // (See accompanying file LICENSE_1_0.txt or copy at 8 | // http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | import std.stdio; 11 | import std.getopt; 12 | import std.file; 13 | import std.utf; 14 | 15 | version (bootstrap) { 16 | import bootstrap; 17 | } else { 18 | import dunnart; 19 | } 20 | import grammar; 21 | import cli; 22 | import errors; 23 | 24 | int main(string[] args) 25 | { 26 | if (!process_command_line(args)) { 27 | return 1; 28 | } 29 | // Read the text to be parsed 30 | string input_text; 31 | try { 32 | input_text = readText(input_file_path); 33 | } catch (FileException e) { 34 | auto msg = extract_file_exception_msg(e); 35 | writeln(msg); 36 | return 2; 37 | } catch (UTFException e) { 38 | writefln("%s: not a valid text file: %s", input_file_path, e); 39 | return 3; 40 | } 41 | // Parse the text and generate the grammar specification 42 | auto grammar_specification = parse_specification_text(input_text, input_file_path); 43 | if (grammar_specification is null) return 4; 44 | if (verbose) { 45 | writeln("Grammar Specification\n"); 46 | foreach (text_line; grammar_specification.get_description()) { 47 | writeln(text_line); 48 | } 49 | } 50 | // warn about unused symbols 51 | foreach (unused_symbol; grammar_specification.symbol_table.get_unused_symbols()) { 52 | warning(unused_symbol.defined_at, "Symbol \"%s\" is not used", unused_symbol); 53 | } 54 | // undefined symbols are fatal errors 55 | foreach (undefined_symbol; grammar_specification.symbol_table.get_undefined_symbols()) { 56 | foreach (locn; undefined_symbol.used_at) { 57 | error(locn, "Symbol \"%s\" is not defined", undefined_symbol); 58 | } 59 | } 60 | if (error_count > 0) { 61 | stderr.writefln("Too many (%s) errors aborting", error_count); 62 | return 5; 63 | } 64 | // Generate the grammar from the specification 65 | auto grammar = new Grammar(grammar_specification); 66 | if (grammar is null) return 6; 67 | if (state_file_path) { 68 | try { 69 | auto state_file = File(state_file_path, "w"); 70 | state_file.write(grammar.get_parser_states_description()); 71 | } catch (Exception e) { 72 | writeln(e); 73 | } 74 | } else if (verbose) { 75 | writeln("\nGrammar"); 76 | writeln(grammar.get_parser_states_description()); 77 | } 78 | if (grammar.total_unresolved_conflicts > 0) { 79 | foreach (parser_state; grammar.parser_states) { 80 | foreach (src; parser_state.shift_reduce_conflicts) { 81 | writefln("State<%s>: shift/reduce conflict on token: %s", parser_state.id, src.shift_symbol); 82 | } 83 | foreach (rrc; parser_state.reduce_reduce_conflicts) { 84 | writefln("State<%s>: reduce/reduce conflict on token(s): %s", parser_state.id, rrc.look_ahead_set_intersection); 85 | } 86 | } 87 | if (grammar.total_unresolved_conflicts != expected_number_of_conflicts) { 88 | stderr.writefln("Unexpected conflicts (%s) aborting", grammar.total_unresolved_conflicts); 89 | return 7; 90 | } 91 | } 92 | try { 93 | auto output_file = File(output_file_path, "w"); 94 | grammar.write_parser_code(output_file, module_name); 95 | } catch (Exception e) { 96 | writeln(e); 97 | return 8; 98 | } 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /dunnart.ddgs: -------------------------------------------------------------------------------- 1 | // dunnart.ddgs 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | // dunnart specification for dunnart grammar specification language 10 | 11 | %{ 12 | // dunnart.d 13 | // 14 | // Copyright Peter Williams 2013 . 15 | // 16 | // Distributed under the Boost Software License, Version 1.0. 17 | // (See accompanying file LICENSE_1_0.txt or copy at 18 | // http://www.boost.org/LICENSE_1_0.txt) 19 | %} 20 | 21 | %{ 22 | import std.stdio; 23 | import std.file; 24 | 25 | import ddlib.lexan; 26 | import symbols; 27 | import grammar; 28 | 29 | struct ProductionTail { 30 | Symbol[] right_hand_side; 31 | AssociativePrecedence associative_precedence; 32 | Predicate predicate; 33 | SemanticAction action; 34 | } 35 | 36 | // Aliases for use in field definitions 37 | alias ProductionTail[] ProductionTailList; 38 | alias Symbol[] SymbolList; 39 | alias string[] StringList; 40 | 41 | uint error_count; 42 | uint warning_count; 43 | 44 | void message(T...)(const CharLocation locn, const string tag, const string format, T args) 45 | { 46 | stderr.writef("%s:%s:", locn, tag); 47 | stderr.writefln(format, args); 48 | stderr.flush(); 49 | } 50 | 51 | void warning(T...)(const CharLocation locn, const string format, T args) 52 | { 53 | message(locn, "Warning", format, args); 54 | warning_count++; 55 | } 56 | 57 | void error(T...)(const CharLocation locn, const string format, T args) 58 | { 59 | message(locn, "Error", format, args); 60 | error_count++; 61 | } 62 | 63 | GrammarSpecification parse_specification_text(string text, string label="") { 64 | auto grammar_specification = new GrammarSpecification(); 65 | %} 66 | 67 | %field Symbol symbol 68 | %field SymbolList symbol_list 69 | %field Predicate predicate 70 | %field SemanticAction semantic_action 71 | %field AssociativePrecedence associative_precedence 72 | %field ProductionTail production_tail 73 | %field ProductionTailList production_tail_list 74 | %field StringList string_list 75 | 76 | %token REGEX (\(.+\)(?=\s)) 77 | %token LITERAL ("(\\"|[^"\t\r\n\v\f])*") 78 | %token TOKEN "%token" 79 | %token FIELD "%field" 80 | %token LEFT "%left" 81 | %token RIGHT "%right" 82 | %token NONASSOC "%nonassoc" 83 | %token PRECEDENCE "%prec" 84 | %token SKIP "%skip" 85 | %token ERROR "%error" 86 | %token INJECT "%inject" 87 | %token NEWSECTION "%%" 88 | %token COLON ":" 89 | %token VBAR "|" 90 | %token DOT "." 91 | %token IDENT ([a-zA-Z]+[a-zA-Z0-9_]*) 92 | %token FIELDNAME (<[a-zA-Z]+[a-zA-Z0-9_]*>) 93 | %token PREDICATE (\?\((.|[\n\r])*?\?\)) 94 | %token ACTION (!\{(.|[\n\r])*?!\}) 95 | %token DCODE (%\{(.|[\n\r])*?%\}) 96 | 97 | %skip (/\*(.|[\n\r])*?\*/) 98 | %skip (//[^\n\r]*) 99 | %skip (\s+) 100 | 101 | %right INJECT 102 | 103 | %% 104 | specification: preamble definitions "%%" production_rules coda. 105 | 106 | oinjection: !{ // no injection so nothing to do !} | injection . 107 | 108 | injection_head: "%inject" LITERAL 109 | !{ 110 | auto file_path = $2.dd_matched_text[1..$ - 1]; 111 | try { 112 | auto text = readText(file_path); 113 | if (text.length > 0) { 114 | dd_inject(text, file_path); 115 | } else { 116 | warning($2.dd_location, "Injected file \"%s\" is empty.", file_path); 117 | } 118 | } catch (FileException e) { 119 | error($2.dd_location, " Injecting: %s.", e.msg); 120 | } 121 | !} 122 | . 123 | 124 | // NB This (including the split) is needed to allow for "look ahead" mechanism. 125 | // It ensures injection occurs before any meaningful tokens are read 126 | injection: injection_head "." . 127 | 128 | // Preamble 129 | preamble: 130 | !{ 131 | // no preamble defined so there's nothing to do 132 | !} 133 | | oinjection DCODE oinjection 134 | !{ 135 | grammar_specification.set_preamble($2.dd_matched_text[2..$ - 2]); 136 | !} 137 | | oinjection DCODE oinjection DCODE oinjection 138 | !{ 139 | grammar_specification.set_header($2.dd_matched_text[2..$ - 2]); 140 | grammar_specification.set_preamble($4.dd_matched_text[2..$ - 2]); 141 | !} 142 | . 143 | 144 | // Coda 145 | coda: 146 | !{ 147 | // no coda defined so there's nothing to do 148 | !} 149 | | oinjection DCODE 150 | !{ 151 | grammar_specification.set_coda($2.dd_matched_text[2..$ - 2]); 152 | !} 153 | . 154 | 155 | definitions : field_definitions token_definitions skip_definitions precedence_definitions . 156 | 157 | // Field definitions 158 | field_definitions : // empty production 159 | !{ 160 | // do nothing 161 | !} 162 | | field_definitions oinjection field_definition oinjection 163 | . 164 | 165 | field_definition: "%field" field_type field_name 166 | !{ 167 | if (grammar_specification.symbol_table.is_known_field($3.dd_matched_text)) { 168 | auto previous = grammar_specification.symbol_table.get_field_defined_at($3.dd_matched_text); 169 | error($3.dd_location, "\"%s\" already declared at line %s.", previous.line_number); 170 | } else { 171 | grammar_specification.symbol_table.new_field($3.dd_matched_text, $2.dd_matched_text, "", $3.dd_location); 172 | } 173 | !} 174 | | "%field" field_type field_name field_conversion_function 175 | !{ 176 | if (grammar_specification.symbol_table.is_known_field($3.dd_matched_text)) { 177 | auto previous = grammar_specification.symbol_table.get_field_defined_at($3.dd_matched_text); 178 | error($3.dd_location, "\"%s\" already declared at line %s.", previous.line_number); 179 | } else { 180 | grammar_specification.symbol_table.new_field($3.dd_matched_text, $2.dd_matched_text, $4.dd_matched_text, $3.dd_location); 181 | } 182 | !} 183 | . 184 | 185 | field_type: IDENT ?( !is_allowable_name($1.dd_matched_text) ?) 186 | !{ 187 | warning($1.dd_location, "field type name \"%s\" may clash with generated code", $1.dd_matched_text); 188 | !} 189 | | IDENT 190 | . 191 | 192 | field_name: IDENT ?( !is_allowable_name($1.dd_matched_text) ?) 193 | !{ 194 | warning($1.dd_location, "field name \"%s\" may clash with generated code", $1.dd_matched_text); 195 | !} 196 | | IDENT 197 | . 198 | 199 | field_conversion_function: IDENT ?( !is_allowable_name($1.dd_matched_text) ?) 200 | !{ 201 | warning($1.dd_location, "field conversion function name \"%s\" may clash with generated code", $1.dd_matched_text); 202 | !} 203 | | IDENT 204 | . 205 | 206 | // Token definitions 207 | token_definitions : oinjection token_definition 208 | | token_definitions oinjection token_definition oinjection 209 | . 210 | 211 | token_definition: "%token" new_token_name pattern 212 | !{ 213 | if (grammar_specification.symbol_table.is_known_symbol($2.dd_matched_text)) { 214 | auto previous = grammar_specification.symbol_table.get_declaration_point($2.dd_matched_text); 215 | error($2.dd_location, "\"%s\" already declared at line %s.", previous.line_number); 216 | } else { 217 | grammar_specification.symbol_table.new_token($2.dd_matched_text, $3.dd_matched_text, $2.dd_location); 218 | } 219 | !} 220 | | "%token" FIELDNAME new_token_name pattern 221 | !{ 222 | auto field_name = $2.dd_matched_text[1..$ - 1]; 223 | if (grammar_specification.symbol_table.is_known_symbol($3.dd_matched_text)) { 224 | auto previous = grammar_specification.symbol_table.get_declaration_point($3.dd_matched_text); 225 | error($3.dd_location, "\"%s\" already declared at line %s.", previous.line_number); 226 | } else if (!grammar_specification.symbol_table.is_known_field(field_name)) { 227 | error($2.dd_location, "field name \"%s\" is not known.", field_name); 228 | grammar_specification.symbol_table.new_token($3.dd_matched_text, $4.dd_matched_text, $3.dd_location); 229 | } else { 230 | grammar_specification.symbol_table.new_token($3.dd_matched_text, $4.dd_matched_text, $3.dd_location, field_name); 231 | } 232 | !} 233 | . 234 | 235 | new_token_name: IDENT ?( !is_allowable_name($1.dd_matched_text) ?) 236 | !{ 237 | warning($1.dd_location, "token name \"%s\" may clash with generated code", $1.dd_matched_text); 238 | !} 239 | | IDENT 240 | . 241 | 242 | pattern: REGEX 243 | | LITERAL 244 | . 245 | 246 | // Skip definitions 247 | skip_definitions : // empty production 248 | !{ 249 | // do nothing 250 | !} 251 | | skip_definitions oinjection skip_definition oinjection 252 | . 253 | 254 | skip_definition: "%skip" REGEX 255 | !{ 256 | grammar_specification.symbol_table.add_skip_rule($2.dd_matched_text); 257 | !} 258 | . 259 | 260 | // Precedence definitions 261 | precedence_definitions : // empty production 262 | !{ 263 | // do nothing 264 | !} 265 | | precedence_definitions oinjection precedence_definition oinjection 266 | . 267 | 268 | precedence_definition: "%left" tag_list 269 | !{ 270 | grammar_specification.symbol_table.set_precedences(Associativity.left, $2.symbol_list); 271 | !} 272 | | "%right" tag_list 273 | !{ 274 | grammar_specification.symbol_table.set_precedences(Associativity.right, $2.symbol_list); 275 | !} 276 | | "%nonassoc" tag_list 277 | !{ 278 | grammar_specification.symbol_table.set_precedences(Associativity.nonassoc, $2.symbol_list); 279 | !} 280 | . 281 | 282 | tag_list: tag 283 | !{ 284 | if ($1.symbol is null) { 285 | $$.symbol_list = []; 286 | } else { 287 | $$.symbol_list = [$1.symbol]; 288 | } 289 | !} 290 | | tag_list tag 291 | !{ 292 | if ($2.symbol is null) { 293 | $$.symbol_list = $1.symbol_list; 294 | } else { 295 | $$.symbol_list = $1.symbol_list ~ $2.symbol; 296 | } 297 | !} 298 | . 299 | 300 | tag: LITERAL 301 | !{ 302 | $$.symbol = grammar_specification.symbol_table.get_literal_token($1.dd_matched_text, $1.dd_location); 303 | if ($$.symbol is null) { 304 | error($1.dd_location, "Literal \"%s\" is not known.", $1.dd_matched_text); 305 | } 306 | !} 307 | | IDENT ?( grammar_specification.symbol_table.is_known_token($1.dd_matched_text) ?) 308 | !{ 309 | $$.symbol = grammar_specification.symbol_table.get_symbol($1.dd_matched_text, $1.dd_location); 310 | !} 311 | | IDENT ?( grammar_specification.symbol_table.is_known_non_terminal($1.dd_matched_text) ?) 312 | !{ 313 | $$.symbol = null; 314 | error($1.dd_location, "Non terminal \"%s\" cannot be used as precedence tag.", $1.dd_matched_text); 315 | !} 316 | | IDENT 317 | !{ 318 | $$.symbol = grammar_specification.symbol_table.new_tag($1.dd_matched_text, $1.dd_location); 319 | !} 320 | . 321 | 322 | // Production rules 323 | production_rules: oinjection production_group oinjection 324 | | production_rules production_group oinjection 325 | . 326 | 327 | production_group: production_group_head production_tail_list "." 328 | !{ 329 | foreach (production_tail; $2.production_tail_list) { 330 | grammar_specification.new_production(dd_args[1 - 1].symbol, production_tail.right_hand_side, production_tail.predicate, production_tail.action, production_tail.associative_precedence); 331 | } 332 | !} 333 | . 334 | 335 | production_group_head: IDENT ":" ?( grammar_specification.symbol_table.is_known_token($1.dd_matched_text) ?) 336 | !{ 337 | auto lineNo = grammar_specification.symbol_table.get_declaration_point($1.dd_matched_text).line_number; 338 | error($1.dd_location, "%s: token (defined at line %s) cannot be used as left hand side", $1.dd_matched_text, lineNo); 339 | $$.symbol = grammar_specification.symbol_table.get_symbol($1.dd_matched_text, $1.dd_location); 340 | !} 341 | | IDENT ":" ?( grammar_specification.symbol_table.is_known_tag($1.dd_matched_text) ?) 342 | !{ 343 | auto lineNo = grammar_specification.symbol_table.get_declaration_point($1.dd_matched_text).line_number; 344 | error($1.dd_location, "%s: precedence tag (defined at line %s) cannot be used as left hand side", $1.dd_matched_text, lineNo); 345 | $$.symbol = grammar_specification.symbol_table.get_symbol($1.dd_matched_text, $1.dd_location); 346 | !} 347 | | IDENT ":" 348 | !{ 349 | if (!is_allowable_name($1.dd_matched_text)) { 350 | warning($1.dd_location, "non terminal symbol name \"%s\" may clash with generated code", $1.dd_matched_text); 351 | } 352 | $$.symbol = grammar_specification.symbol_table.define_non_terminal($1.dd_matched_text, $1.dd_location); 353 | !} 354 | . 355 | 356 | production_tail_list: production_tail 357 | !{ 358 | $$.production_tail_list = [$1.production_tail]; 359 | !} 360 | | production_tail_list "|" production_tail 361 | !{ 362 | $$.production_tail_list = $1.production_tail_list ~ $3.production_tail; 363 | !} 364 | . 365 | 366 | production_tail: 367 | !{ 368 | $$.production_tail = ProductionTail([], AssociativePrecedence(), null, null); 369 | !} 370 | | action 371 | !{ 372 | $$.production_tail = ProductionTail([], AssociativePrecedence(), null, $1.semantic_action); 373 | !} 374 | | predicate action 375 | !{ 376 | $$.production_tail = ProductionTail([], AssociativePrecedence(), $1.predicate, $2.semantic_action); 377 | !} 378 | | predicate 379 | !{ 380 | $$.production_tail = ProductionTail([], AssociativePrecedence(), $1.predicate, null); 381 | !} 382 | | symbol_list predicate tagged_precedence action 383 | !{ 384 | $$.production_tail = ProductionTail($1.symbol_list, $3.associative_precedence, $2.predicate, $4.semantic_action); 385 | !} 386 | | symbol_list predicate tagged_precedence 387 | !{ 388 | $$.production_tail = ProductionTail($1.symbol_list, $3.associative_precedence, $2.predicate, null); 389 | !} 390 | | symbol_list predicate action 391 | !{ 392 | $$.production_tail = ProductionTail($1.symbol_list, AssociativePrecedence(), $2.predicate, $3.semantic_action); 393 | !} 394 | | symbol_list predicate 395 | !{ 396 | $$.production_tail = ProductionTail($1.symbol_list, AssociativePrecedence(), $2.predicate, null); 397 | !} 398 | | symbol_list tagged_precedence action 399 | !{ 400 | $$.production_tail = ProductionTail($1.symbol_list, $2.associative_precedence, null, $3.semantic_action); 401 | !} 402 | | symbol_list tagged_precedence 403 | !{ 404 | $$.production_tail = ProductionTail($1.symbol_list, $2.associative_precedence); 405 | !} 406 | | symbol_list action 407 | !{ 408 | $$.production_tail = ProductionTail($1.symbol_list, AssociativePrecedence(), null, $2.semantic_action); 409 | !} 410 | | symbol_list 411 | !{ 412 | $$.production_tail = ProductionTail($1.symbol_list); 413 | !} 414 | . 415 | 416 | action: ACTION 417 | !{ 418 | $$.semantic_action = $1.dd_matched_text[2..$ - 2]; 419 | !} 420 | . 421 | 422 | predicate: PREDICATE 423 | !{ 424 | $$.predicate = $1.dd_matched_text[2..$ - 2]; 425 | !} 426 | . 427 | 428 | tagged_precedence: "%prec" IDENT 429 | !{ 430 | auto symbol = grammar_specification.symbol_table.get_symbol($2.dd_matched_text, $2.dd_location, false); 431 | if (symbol is null) { 432 | error($2.dd_location, "%s: Unknown symbol.", $2.dd_matched_text); 433 | $$.associative_precedence = AssociativePrecedence(); 434 | } else if (symbol.type == SymbolType.non_terminal) { 435 | error($2.dd_location, "%s: Illegal precedence tag (must be Token or Tag).", $2.dd_matched_text); 436 | $$.associative_precedence = AssociativePrecedence(); 437 | } else { 438 | $$.associative_precedence = symbol.associative_precedence; 439 | } 440 | !} 441 | | "%prec" LITERAL 442 | !{ 443 | auto symbol = grammar_specification.symbol_table.get_literal_token($2.dd_matched_text, $2.dd_location); 444 | if (symbol is null) { 445 | $$.associative_precedence = AssociativePrecedence(); 446 | error($2.dd_location, "%s: Unknown literal token.", $2.dd_matched_text); 447 | } else { 448 | $$.associative_precedence = symbol.associative_precedence; 449 | } 450 | !} 451 | . 452 | 453 | symbol_list: symbol 454 | !{ 455 | $$.symbol_list = [$1.symbol]; 456 | !} 457 | | symbol_list symbol 458 | !{ 459 | $$.symbol_list = $1.symbol_list ~ $2.symbol; 460 | !} 461 | . 462 | 463 | symbol: IDENT 464 | !{ 465 | $$.symbol = grammar_specification.symbol_table.get_symbol($1.dd_matched_text, $1.dd_location, true); 466 | !} 467 | | LITERAL 468 | !{ 469 | $$.symbol = grammar_specification.symbol_table.get_literal_token($1.dd_matched_text, $1.dd_location); 470 | if ($$.symbol is null) { 471 | error($1.dd_location, "%s: unknown literal token", $1.dd_matched_text); 472 | } 473 | !} 474 | | "%error" 475 | !{ 476 | $$.symbol = grammar_specification.symbol_table.get_special_symbol(SpecialSymbols.parse_error); 477 | !} 478 | . 479 | 480 | %{ 481 | if (!dd_parse_text(text, label)) return null; 482 | return grammar_specification; 483 | } 484 | %} 485 | -------------------------------------------------------------------------------- /errors.d: -------------------------------------------------------------------------------- 1 | // errors.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | // Make exception's messages usable for error messages 10 | 11 | import std.string; 12 | import std.file; 13 | 14 | string extract_file_exception_msg(FileException e) 15 | { 16 | auto firstLine = splitLines(format("%s", e))[0]; 17 | return firstLine[indexOf(firstLine, ": ") + 2..$]; 18 | } 19 | -------------------------------------------------------------------------------- /examples/calc/Makefile: -------------------------------------------------------------------------------- 1 | calc: calc.d parser.d 2 | dmd -I../.. calc.d parser.d ../../ddlib/lexan.d ../../ddlib/templates.d 3 | 4 | parser.d: parser.ddgs ../../ddpg 5 | ../../ddpg -f --states=parser.states --expect=1 parser.ddgs 6 | 7 | test: calc test_data 8 | ./calc test_data 9 | -------------------------------------------------------------------------------- /examples/calc/calc.d: -------------------------------------------------------------------------------- 1 | #!/usr/bin/rdmd 2 | // calc.d 3 | // 4 | // Copyright Peter Williams 2013 . 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. 7 | // (See accompanying file LICENSE_1_0.txt or copy at 8 | // http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | import std.stdio; 11 | import std.file; 12 | 13 | import parser; 14 | 15 | int main(string[] args) 16 | { 17 | auto text = readText(args[1]); 18 | writeln(text); 19 | dd_parse_text(text); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /examples/calc/parser.ddgs: -------------------------------------------------------------------------------- 1 | %{ 2 | import std.stdio; 3 | double[string] variables; 4 | enum Errors { undefinedVariables = 1, divideByZero = 2, syntaxError = 4, lexError = 8 }; 5 | uint errors; 6 | 7 | void report_errors() 8 | { 9 | auto report = "Errors:"; 10 | if (errors & Errors.undefinedVariables) { 11 | report ~= " \"Undefined Variables\""; 12 | } 13 | if (errors & Errors.divideByZero) { 14 | report ~= " \"Divide by Zero\""; 15 | } 16 | if (errors & Errors.syntaxError) { 17 | report ~= " \"Syntax Errors\""; 18 | } 19 | if (errors & Errors.lexError) { 20 | report ~= " \"Lexical Errors\""; 21 | } 22 | stderr.writeln(report); 23 | } 24 | %} 25 | 26 | %field double value 27 | %field string id 28 | 29 | %token EOL (\n) 30 | %token PLUS "+" 31 | %token MINUS "-" 32 | %token TIMES "*" 33 | %token DIVIDE "/" 34 | %token ASSIGN "=" 35 | %token NUMBER ([0-9]+(\.[0-9]+){0,1}) 36 | %token ID ([a-zA-Z]+) 37 | %token LPR "(" 38 | %token RPR ")" 39 | 40 | %skip ([\t\r ]+) 41 | 42 | %right UMINUS 43 | %left "*" "/" 44 | %left "+" "-" 45 | %left EOL 46 | 47 | %% 48 | line: setup expr ?(errors > 0?) !{report_errors();!} 49 | | setup expr !{writeln($2.value);!} 50 | | setup ID "=" expr ?(errors == 0?) !{variables[$2.id] = $4.value;!} 51 | | setup ID "=" expr !{report_errors();!} 52 | | line EOL line 53 | | line EOL 54 | | %error !{errors |= Errors.syntaxError;!} 55 | . 56 | 57 | setup: !{errors = 0;!}. 58 | 59 | expr: expr "+" expr ?($1.value == 0?) !{$$.value = $3.value;!} 60 | | expr "+" expr ?($3.value == 0?) !{$$.value = $1.value;!} 61 | | expr "+" expr !{$$.value = $1.value + $3.value;!} 62 | | expr "-" expr ?($1.value == 0?) !{$$.value = -$3.value;!} 63 | | expr "-" expr ?($3.value == 0?) !{$$.value = $1.value;!} 64 | | expr "-" expr !{$$.value = $1.value - $3.value;!} 65 | | expr "*" expr ?($1.value == 0 || $3.value == 0?) !{$$.value = -$3.value;!} 66 | | expr "*" expr ?($1.value == 1?) !{$$.value = $3.value;!} 67 | | expr "*" expr ?($3.value == 1?) !{$$.value = $1.value;!} 68 | | expr "*" expr !{$$.value = $1.value * $3.value;!} 69 | | expr "/" expr ?($3.value == 1?) !{$$.value = $1.value;!} 70 | | expr "/" expr ?($3.value == 0?) !{errors |= Errors.divideByZero;!} 71 | | expr "/" expr ?($1.value == 0?) !{$$.value = 0;!} 72 | | expr "/" expr !{$$.value = $1.value / $3.value;!} 73 | | "(" expr ")" !{$$.value = $2.value;!} 74 | | "-" expr %prec UMINUS !{$$.value = -$2.value;!} 75 | | NUMBER !{$$.value = $1.value;!} 76 | | ID ?($1.id in variables?) !{$$.value = variables[$1.id];!} 77 | | ID !{errors |= Errors.undefinedVariables; $$.value = 0;!} 78 | . 79 | -------------------------------------------------------------------------------- /examples/calc/test_data: -------------------------------------------------------------------------------- 1 | a = 6.33 * 5 2 | b = a + (7 - 9.3) 3 | a / b 4 | c 5 | c = 8 6 | c - a 7 | ??? 8 | d = 7 + 8 9 | d 10 | -------------------------------------------------------------------------------- /examples/calc2/Makefile: -------------------------------------------------------------------------------- 1 | calc: calc.d parser.d 2 | dmd -I../.. calc.d parser.d ../../ddlib/lexan.d ../../ddlib/templates.d 3 | 4 | parser.d: parser.ddgs ../../ddpg 5 | ../../ddpg -f --states=parser.states --expect=1 parser.ddgs 6 | 7 | test: calc test_data 8 | ./calc test_data 9 | -------------------------------------------------------------------------------- /examples/calc2/calc.d: -------------------------------------------------------------------------------- 1 | #!/usr/bin/rdmd 2 | // calc.d 3 | // 4 | // Copyright Peter Williams 2013 . 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. 7 | // (See accompanying file LICENSE_1_0.txt or copy at 8 | // http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | import std.stdio; 11 | import std.file; 12 | 13 | import parser; 14 | 15 | int main(string[] args) 16 | { 17 | auto text = readText(args[1]); 18 | writeln(text); 19 | dd_parse_text(text); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /examples/calc2/parser.ddgs: -------------------------------------------------------------------------------- 1 | %{ 2 | import std.stdio; 3 | double[string] variables; 4 | enum Errors { undefinedVariables = 1, divideByZero = 2, syntaxError = 4, lexError = 8 }; 5 | uint errors; 6 | 7 | void report_errors() 8 | { 9 | auto report = "Errors:"; 10 | if (errors & Errors.undefinedVariables) { 11 | report ~= " \"Undefined Variables\""; 12 | } 13 | if (errors & Errors.divideByZero) { 14 | report ~= " \"Divide by Zero\""; 15 | } 16 | if (errors & Errors.syntaxError) { 17 | report ~= " \"Syntax Errors\""; 18 | } 19 | if (errors & Errors.lexError) { 20 | report ~= " \"Lexical Errors\""; 21 | } 22 | stderr.writeln(report); 23 | } 24 | %} 25 | 26 | %field double value 27 | %field string id 28 | 29 | %token EOL (\n) 30 | %token PLUS "+" 31 | %token MINUS "-" 32 | %token TIMES "*" 33 | %token DIVIDE "/" 34 | %token ASSIGN "=" 35 | %token NUMBER ([0-9]+(\.[0-9]+){0,1}) 36 | %token ID ([a-zA-Z]+) 37 | %token LPR "(" 38 | %token RPR ")" 39 | 40 | %skip ([\t\r ]+) 41 | 42 | %right UMINUS 43 | %left "*" "/" 44 | %left "+" "-" 45 | %left EOL 46 | 47 | %% 48 | line: setup expr !{writeln($2.value);!} 49 | | setup ID "=" expr !{variables[$2.id] = $4.value;!} 50 | | line EOL line 51 | | line EOL 52 | | %error !{writeln("EL: ", $1.dd_syntax_error_data);!} 53 | . 54 | 55 | setup: !{errors = 0;!}. 56 | 57 | expr: expr "+" expr ?($1.value == 0?) !{$$.value = $3.value;!} 58 | | expr "+" expr ?($3.value == 0?) !{$$.value = $1.value;!} 59 | | expr "+" expr !{$$.value = $1.value + $3.value;!} 60 | | expr "-" expr ?($1.value == 0?) !{$$.value = -$3.value;!} 61 | | expr "-" expr ?($3.value == 0?) !{$$.value = $1.value;!} 62 | | expr "-" expr !{$$.value = $1.value - $3.value;!} 63 | | expr "*" expr ?($1.value == 0 || $3.value == 0?) !{$$.value = -$3.value;!} 64 | | expr "*" expr ?($1.value == 1?) !{$$.value = $3.value;!} 65 | | expr "*" expr ?($3.value == 1?) !{$$.value = $1.value;!} 66 | | expr "*" expr !{$$.value = $1.value * $3.value;!} 67 | | expr "/" expr ?($3.value == 1?) !{$$.value = $1.value;!} 68 | | expr "/" expr ?($3.value == 0?) !{$$.value = 0; writeln("Divide by zero!!");!} 69 | | expr "/" expr ?($1.value == 0?) !{$$.value = 0;!} 70 | | expr "/" expr !{$$.value = $1.value / $3.value;!} 71 | | "(" expr ")" !{$$.value = $2.value;!} 72 | | "-" expr %prec UMINUS !{$$.value = -$2.value;!} 73 | | NUMBER !{$$.value = $1.value;!} 74 | | ID ?($1.id in variables?) !{$$.value = variables[$1.id];!} 75 | | ID !{writefln("%s: undefined variable", $1.id); $$.value = 0;!} 76 | | %error !{writeln("EE: ", $1.dd_syntax_error_data);!} 77 | . 78 | -------------------------------------------------------------------------------- /examples/calc2/test_data: -------------------------------------------------------------------------------- 1 | a = 6.33 * 5 2 | b = a + (7 - 9.3) 3 | a / b 4 | c 5 | c = 8 6 | c - a 7 | ??? 8 | d = 7 + 8 9 | d 10 | -------------------------------------------------------------------------------- /examples/evaluater/Makefile: -------------------------------------------------------------------------------- 1 | calc: evaluator.d parser.d 2 | dmd -I../.. evaluator.d parser.d ../../ddlib/lexan.d ../../ddlib/templates.d 3 | 4 | parser.d: parser.ddgs ../../ddpg 5 | ../../ddpg -f parser.ddgs 6 | -------------------------------------------------------------------------------- /examples/evaluater/evaluator.d: -------------------------------------------------------------------------------- 1 | #!/usr/bin/rdmd 2 | // calc.d 3 | // 4 | // Copyright Peter Williams 2013 . 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. 7 | // (See accompanying file LICENSE_1_0.txt or copy at 8 | // http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | import std.stdio; 11 | import std.file; 12 | import std.string; 13 | import parser; 14 | 15 | int main(string[] args) 16 | { 17 | auto text = readText(args[1]); 18 | foreach (line; splitLines(text)) { 19 | writefln("Evaluate: %s", line); 20 | writefln("Yields: %s", evaluate(line)); 21 | } 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/evaluater/parser.ddgs: -------------------------------------------------------------------------------- 1 | // parser.ddgs 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | %{ 9 | // parser.d 10 | // 11 | // Copyright Peter Williams 2013 . 12 | // 13 | // Distributed under the Boost Software License, Version 1.0. 14 | // (See accompanying file LICENSE_1_0.txt or copy at 15 | // http://www.boost.org/LICENSE_1_0.txt) 16 | 17 | import std.stdio; 18 | 19 | %} 20 | 21 | %{ 22 | double evaluate(string text) 23 | { 24 | double result; // leave initialised as NaN 25 | %} 26 | 27 | %field double value 28 | 29 | %token PLUS "+" 30 | %token MINUS "-" 31 | %token TIMES "*" 32 | %token DIVIDE "/" 33 | %token NUMBER ([0-9]+(\.[0-9]+){0,1}) 34 | %token LPR "(" 35 | %token RPR ")" 36 | %token STRING ("(\\"|[^"])*") 37 | %token BOGUS "uv" 38 | 39 | %skip (\s+) 40 | 41 | %right UMINUS 42 | %left "*" "/" 43 | %left "+" "-" 44 | 45 | %% 46 | %inject "/dev/null" 47 | 48 | equation: expr !{result = $1.value;!} . 49 | 50 | expr: expr "+" expr !{$$.value = $1.value + $3.value;!} 51 | | expr "-" expr !{$$.value = $1.value - $3.value;!} 52 | | expr "*" expr !{$$.value = $1.value * $3.value;!} 53 | | expr "/" expr !{$$.value = $1.value / $3.value;!} 54 | | "(" expr ")" !{$$.value = $2.value;!} 55 | | "-" expr %prec UMINUS !{$$.value = -$2.value;!} 56 | | NUMBER !{$$.value = $1.value;!} 57 | | STRING !{ writeln("string: ", $1.dd_matched_text );!} 58 | | STRING STRING !{ writeln("string 1: ", $1.dd_matched_text, "; string 2:", $2.dd_matched_text );!} 59 | | BOGUS !{!} 60 | . 61 | 62 | %{ 63 | dd_parse_text(text); 64 | return result; 65 | } 66 | %} 67 | -------------------------------------------------------------------------------- /examples/evaluater/test_input: -------------------------------------------------------------------------------- 1 | 2 + 3 2 | (6.9 + 8) / 3.1 3 | (2.8 + 6 4 | "a string \"name\" " 5 | "one" "two" 6 | -------------------------------------------------------------------------------- /examples/test_inject/Makefile: -------------------------------------------------------------------------------- 1 | calc: calc.d parser.d 2 | dmd -I../.. calc.d parser.d ../../ddlib/lexan.d ../../ddlib/templates.d 3 | 4 | parser.d: parser.ddgs ../../ddpg 5 | ../../ddpg -f --states=parser.states --expect=1 parser.ddgs 6 | 7 | test: calc test_data 8 | ./calc test_data 9 | -------------------------------------------------------------------------------- /examples/test_inject/calc.d: -------------------------------------------------------------------------------- 1 | #!/usr/bin/rdmd 2 | // calc.d 3 | // 4 | // Copyright Peter Williams 2013 . 5 | // 6 | // Distributed under the Boost Software License, Version 1.0. 7 | // (See accompanying file LICENSE_1_0.txt or copy at 8 | // http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | import std.stdio; 11 | import std.file; 12 | 13 | import parser; 14 | 15 | int main(string[] args) 16 | { 17 | auto text = readText(args[1]); 18 | writeln(text); 19 | dd_parse_text(text); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /examples/test_inject/expr.ddgsi: -------------------------------------------------------------------------------- 1 | 2 | expr: expr "+" expr ?($1.value == 0?) !{$$.value = $3.value;!} 3 | | expr "+" expr ?($3.value == 0?) !{$$.value = $1.value;!} 4 | | expr "+" expr !{$$.value = $1.value + $3.value;!} 5 | | expr "-" expr ?($1.value == 0?) !{$$.value = -$3.value;!} 6 | | expr "-" expr ?($3.value == 0?) !{$$.value = $1.value;!} 7 | | expr "-" expr !{$$.value = $1.value - $3.value;!} 8 | | expr "*" expr ?($1.value == 0 || $3.value == 0?) !{$$.value = -$3.value;!} 9 | | expr "*" expr ?($1.value == 1?) !{$$.value = $3.value;!} 10 | | expr "*" expr ?($3.value == 1?) !{$$.value = $1.value;!} 11 | | expr "*" expr !{$$.value = $1.value * $3.value;!} 12 | | expr "/" expr ?($3.value == 1?) !{$$.value = $1.value;!} 13 | | expr "/" expr ?($3.value == 0?) !{$$.value = 0; writeln("Divide by zero!!");!} 14 | | expr "/" expr ?($1.value == 0?) !{$$.value = 0;!} 15 | | expr "/" expr !{$$.value = $1.value / $3.value;!} 16 | | "(" expr ")" !{$$.value = $2.value;!} 17 | | "-" expr %prec UMINUS !{$$.value = -$2.value;!} 18 | | NUMBER !{$$.value = $1.value;!} 19 | | ID ?($1.id in variables?) !{$$.value = variables[$1.id];!} 20 | | ID !{writefln("%s: undefined variable", $1.id); $$.value = 0;!} 21 | | %error !{writeln("EE: ", $1.dd_syntax_error_data);!} 22 | . 23 | -------------------------------------------------------------------------------- /examples/test_inject/fields.ddgsi: -------------------------------------------------------------------------------- 1 | 2 | %field double value 3 | %field string id 4 | -------------------------------------------------------------------------------- /examples/test_inject/header.ddgsi: -------------------------------------------------------------------------------- 1 | %{ 2 | import std.stdio; 3 | double[string] variables; 4 | enum Errors { undefinedVariables = 1, divideByZero = 2, syntaxError = 4, lexError = 8 }; 5 | uint errors; 6 | 7 | void report_errors() 8 | { 9 | auto report = "Errors:"; 10 | if (errors & Errors.undefinedVariables) { 11 | report ~= " \"Undefined Variables\""; 12 | } 13 | if (errors & Errors.divideByZero) { 14 | report ~= " \"Divide by Zero\""; 15 | } 16 | if (errors & Errors.syntaxError) { 17 | report ~= " \"Syntax Errors\""; 18 | } 19 | if (errors & Errors.lexError) { 20 | report ~= " \"Lexical Errors\""; 21 | } 22 | stderr.writeln(report); 23 | } 24 | %} 25 | -------------------------------------------------------------------------------- /examples/test_inject/line.ddgsi: -------------------------------------------------------------------------------- 1 | line: setup expr !{writeln($2.value);!} 2 | | setup ID "=" expr !{variables[$2.id] = $4.value;!} 3 | | line EOL line 4 | | line EOL 5 | | %error !{writeln("EL: ", $1.dd_syntax_error_data);!} 6 | . 7 | -------------------------------------------------------------------------------- /examples/test_inject/parser.ddgs: -------------------------------------------------------------------------------- 1 | %inject "header.ddgsi". 2 | 3 | %inject "fields.ddgsi". 4 | 5 | %inject "tokens.ddgsi". 6 | 7 | %inject "skips.ddgsi". 8 | 9 | %inject "precs.ddgsi". 10 | 11 | %% 12 | %inject "line.ddgsi". 13 | 14 | setup: !{errors = 0;!}. 15 | 16 | %inject "expr.ddgsi". 17 | -------------------------------------------------------------------------------- /examples/test_inject/precs.ddgsi: -------------------------------------------------------------------------------- 1 | 2 | %right UMINUS 3 | %left "*" "/" 4 | %left "+" "-" 5 | %left EOL 6 | -------------------------------------------------------------------------------- /examples/test_inject/skips.ddgsi: -------------------------------------------------------------------------------- 1 | %skip ([\t\r ]+) 2 | -------------------------------------------------------------------------------- /examples/test_inject/test_data: -------------------------------------------------------------------------------- 1 | a = 6.33 * 5 2 | b = a + (7 - 9.3) 3 | a / b 4 | c 5 | c = 8 6 | c - a 7 | ??? 8 | d = 7 + 8 9 | d 10 | -------------------------------------------------------------------------------- /examples/test_inject/tokens.ddgsi: -------------------------------------------------------------------------------- 1 | 2 | %token EOL (\n) 3 | %token PLUS "+" 4 | %token MINUS "-" 5 | %token TIMES "*" 6 | %token DIVIDE "/" 7 | %token ASSIGN "=" 8 | %token NUMBER ([0-9]+(\.[0-9]+){0,1}) 9 | %token ID ([a-zA-Z]+) 10 | %token LPR "(" 11 | %token RPR ")" 12 | -------------------------------------------------------------------------------- /grammar.d: -------------------------------------------------------------------------------- 1 | // grammar.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | module grammar; 10 | 11 | import std.string; 12 | import std.regex; 13 | import std.stdio; 14 | 15 | import symbols; 16 | import sets; 17 | import idnumber; 18 | 19 | import workarounds: wa_sort; 20 | 21 | alias uint ProductionId; 22 | alias string Predicate; 23 | alias string SemanticAction; 24 | 25 | class Production { 26 | mixin IdNumber!(ProductionId); 27 | NonTerminalSymbol left_hand_side; 28 | Symbol[] right_hand_side; 29 | Predicate predicate; 30 | SemanticAction action; 31 | AssociativePrecedence associative_precedence; 32 | 33 | this(ProductionId id, NonTerminalSymbol lhs, Symbol[] rhs, Predicate pred, SemanticAction actn, AssociativePrecedence aprec) 34 | { 35 | this.id = id; 36 | left_hand_side = lhs; 37 | right_hand_side = rhs; 38 | predicate = pred; 39 | action = actn; 40 | if (aprec.is_explicitly_set) { 41 | associative_precedence = aprec; 42 | } else { 43 | for (int i = cast(int) rhs.length - 1; i >= 0; i--) { 44 | auto symbol = rhs[i]; 45 | if (symbol.type == SymbolType.token && symbol.associative_precedence.is_explicitly_set) { 46 | // We only inherit precedence/associativity if it's been explicitly set for the token 47 | associative_precedence = symbol.associative_precedence; 48 | break; 49 | } 50 | } 51 | } 52 | } 53 | 54 | @property 55 | Associativity associativity() 56 | { 57 | return associative_precedence.associativity; 58 | } 59 | 60 | @property 61 | Precedence precedence() 62 | { 63 | return associative_precedence.precedence; 64 | } 65 | 66 | @property 67 | const size_t length() 68 | { 69 | return right_hand_side.length; 70 | } 71 | 72 | @property 73 | string expanded_predicate() 74 | { 75 | static auto stack_attr_re = regex(r"\$(\d+)", "g"); 76 | static auto next_token_re = regex(r"\$#", "g"); 77 | auto replaceWith = format("dd_attribute_stack[$$ - %s + $1]", right_hand_side.length + 1); 78 | return replace(replace(predicate, stack_attr_re, replaceWith), next_token_re, "dd_next_token"); 79 | } 80 | 81 | @property 82 | string[] expanded_semantic_action() 83 | { 84 | static auto lhs_re = regex(r"\$\$", "g"); 85 | static auto stack_attr_re = regex(r"\$(\d+)", "g"); 86 | size_t last_non_blank_line_index = 0; 87 | size_t first_non_blank_line_index = 0; 88 | auto non_blank_line_seen = false; 89 | string[] first_pass_lines; 90 | foreach (index, line; replace(replace(action, lhs_re, "dd_lhs"), stack_attr_re, "dd_args[$1 - 1]").splitLines) { 91 | string strippedLine = line.stripRight.detab(4); 92 | if (strippedLine.length > 0) { 93 | last_non_blank_line_index = index; 94 | if (!non_blank_line_seen) first_non_blank_line_index = index; 95 | non_blank_line_seen = true; 96 | } 97 | first_pass_lines ~= strippedLine; 98 | } 99 | auto required_indent = " "; 100 | string[] second_pass_lines; 101 | foreach (line; first_pass_lines[first_non_blank_line_index..last_non_blank_line_index + 1].outdent) { 102 | second_pass_lines ~= required_indent ~ line; 103 | } 104 | return second_pass_lines; 105 | } 106 | 107 | override string toString() 108 | { 109 | // This is just for use in generated code comments 110 | if (right_hand_side.length == 0) { 111 | return format("%s: ", left_hand_side.name); 112 | } 113 | auto str = format("%s:", left_hand_side.name); 114 | foreach (symbol; right_hand_side) { 115 | str ~= format(" %s", symbol); 116 | } 117 | if (predicate.length > 0) { 118 | str ~= format(" ?( %s ?)", predicate); 119 | } 120 | return str; 121 | } 122 | 123 | @property 124 | bool has_error_recovery_tail() 125 | { 126 | if (right_hand_side.length == 0) return false; 127 | auto last_symbol_id = right_hand_side[$ - 1].id; 128 | return last_symbol_id == SpecialSymbols.parse_error; 129 | } 130 | } 131 | 132 | struct GrammarItemKey { 133 | Production production; 134 | uint dot; 135 | invariant () { 136 | assert(dot <= production.length); 137 | } 138 | 139 | this(Production production, uint dot=0) 140 | { 141 | this.production = production; 142 | this.dot = dot; 143 | } 144 | 145 | GrammarItemKey clone_shifted() 146 | in { 147 | assert(dot < production.length); 148 | } 149 | out (result) { 150 | assert(result.dot == dot + 1); 151 | assert(result.production is production); 152 | } 153 | body { 154 | return GrammarItemKey(production, dot + 1); 155 | } 156 | 157 | @property 158 | bool is_reducible() 159 | { 160 | return dot == production.length; 161 | } 162 | 163 | @property 164 | bool is_kernel_item() 165 | { 166 | return dot > 0 || production.left_hand_side.id == SpecialSymbols.start; 167 | } 168 | 169 | @property 170 | bool is_closable() 171 | { 172 | return dot < production.length && production.right_hand_side[dot].type == SymbolType.non_terminal; 173 | } 174 | 175 | @property 176 | Symbol next_symbol() 177 | in { 178 | assert(dot < production.length); 179 | } 180 | body { 181 | return production.right_hand_side[dot]; 182 | } 183 | 184 | @property 185 | Symbol[] tail() 186 | in { 187 | assert(dot < production.length); 188 | } 189 | body { 190 | return production.right_hand_side[dot + 1..$]; 191 | } 192 | 193 | bool next_symbol_is(Symbol symbol) 194 | { 195 | return dot < production.length && production.right_hand_side[dot] == symbol; 196 | } 197 | 198 | hash_t toHash() const 199 | { 200 | return production.id * (dot + 1); 201 | } 202 | 203 | bool opEquals(const GrammarItemKey other) const 204 | { 205 | return production.id == other.production.id && dot == other.dot; 206 | } 207 | 208 | int opCmp(const GrammarItemKey other) const 209 | { 210 | if (production.id == other.production.id) { 211 | return dot - other.dot; 212 | } 213 | return production.id - other.production.id; 214 | } 215 | 216 | string toString() 217 | { 218 | with (production) { 219 | // This is just for use in debugging 220 | if (right_hand_side.length == 0) { 221 | return format("%s: . ", left_hand_side.name); 222 | } 223 | auto str = format("%s:", left_hand_side.name); 224 | for (auto i = 0; i < right_hand_side.length; i++) { 225 | if (i == dot) { 226 | str ~= " ."; 227 | } 228 | str ~= format(" %s", right_hand_side[i]); 229 | } 230 | if (dot == right_hand_side.length) { 231 | str ~= " ."; 232 | } 233 | if (predicate.length > 0) { 234 | str ~= format(" ?( %s ?)", predicate); 235 | } 236 | return str; 237 | } 238 | } 239 | } 240 | 241 | alias Set!(TokenSymbol)[GrammarItemKey] GrammarItemSet; 242 | 243 | Set!GrammarItemKey get_kernel_keys(GrammarItemSet itemset) 244 | { 245 | auto key_set = Set!GrammarItemKey(); 246 | foreach (grammar_item_key; itemset.byKey()) { 247 | if (grammar_item_key.is_kernel_item) { 248 | key_set += grammar_item_key; 249 | } 250 | } 251 | return key_set; 252 | } 253 | 254 | Set!GrammarItemKey get_reducible_keys(GrammarItemSet itemset) 255 | { 256 | auto key_set = Set!GrammarItemKey(); 257 | foreach (grammar_item_key, look_ahead_set; itemset) { 258 | if (grammar_item_key.is_reducible) { 259 | key_set += grammar_item_key; 260 | } 261 | } 262 | return key_set; 263 | } 264 | 265 | GrammarItemSet generate_goto_kernel(GrammarItemSet itemset, Symbol symbol) 266 | { 267 | GrammarItemSet goto_kernel; 268 | foreach (grammar_item_key, look_ahead_set; itemset) { 269 | if (grammar_item_key.next_symbol_is(symbol)) { 270 | goto_kernel[grammar_item_key.clone_shifted()] = look_ahead_set.clone(); 271 | } 272 | } 273 | return goto_kernel; 274 | } 275 | 276 | enum ProcessedState { unprocessed, needs_reprocessing, processed }; 277 | 278 | struct ShiftReduceConflict { 279 | TokenSymbol shift_symbol; 280 | ParserState goto_state; 281 | GrammarItemKey reducible_item; 282 | Set!(TokenSymbol) look_ahead_set; 283 | } 284 | 285 | struct ReduceReduceConflict { 286 | GrammarItemKey[2] reducible_item; 287 | Set!(TokenSymbol) look_ahead_set_intersection; 288 | } 289 | 290 | bool trivially_true(Predicate predicate) 291 | { 292 | return strip(predicate).length == 0 || predicate == "true"; 293 | } 294 | 295 | string token_list_string(TokenSymbol[] tokens) 296 | { 297 | if (tokens.length == 0) return ""; 298 | string str = tokens[0].name; 299 | foreach (token; tokens[1..$]) { 300 | str ~= format(", %s", token.name); 301 | } 302 | return str; 303 | } 304 | 305 | alias uint ParserStateId; 306 | 307 | class ParserState { 308 | mixin IdNumber!(ParserStateId); 309 | GrammarItemSet grammar_items; 310 | ParserState[TokenSymbol] shift_list; 311 | ParserState[NonTerminalSymbol] goto_table; 312 | ParserState error_recovery_state; 313 | ProcessedState state; 314 | ShiftReduceConflict[] shift_reduce_conflicts; 315 | ReduceReduceConflict[] reduce_reduce_conflicts; 316 | 317 | this(ParserStateId id, GrammarItemSet kernel) 318 | { 319 | this.id = id; 320 | grammar_items = kernel; 321 | } 322 | 323 | size_t resolve_shift_reduce_conflicts() 324 | { 325 | // Do this in two stages to obviate problems modifying shift_list 326 | ShiftReduceConflict[] conflicts; 327 | foreach(shift_symbol, goto_state; shift_list) { 328 | foreach (item, look_ahead_set; grammar_items) { 329 | if (item.is_reducible && look_ahead_set.contains(shift_symbol)) { 330 | conflicts ~= ShiftReduceConflict(shift_symbol, goto_state, item, look_ahead_set.clone()); 331 | } 332 | } 333 | } 334 | shift_reduce_conflicts = []; 335 | foreach (conflict; conflicts) { 336 | with (conflict) { 337 | if (shift_symbol.precedence < reducible_item.production.precedence) { 338 | shift_list.remove(shift_symbol); 339 | } else if (shift_symbol.precedence > reducible_item.production.precedence) { 340 | grammar_items[reducible_item] -= shift_symbol; 341 | } else if (reducible_item.production.associativity == Associativity.left) { 342 | shift_list.remove(shift_symbol); 343 | } else if (reducible_item.production.has_error_recovery_tail) { 344 | grammar_items[reducible_item] -= shift_symbol; 345 | } else { 346 | // Default: resolve in favour of shift but mark as 347 | // unresolved giving the user the option of accepting 348 | // the resolution or not (down the track) 349 | grammar_items[reducible_item] -= shift_symbol; 350 | shift_reduce_conflicts ~= conflict; 351 | } 352 | } 353 | } 354 | return shift_reduce_conflicts.length; 355 | } 356 | 357 | size_t resolve_reduce_reduce_conflicts() 358 | { 359 | reduce_reduce_conflicts = []; 360 | auto reducible_key_set = grammar_items.get_reducible_keys(); 361 | if (reducible_key_set.cardinality < 2) return 0; 362 | 363 | auto keys = reducible_key_set.elements; 364 | for (auto i = 0; i < keys.length - 1; i++) { 365 | auto key1 = keys[i]; 366 | foreach (key2; keys[i + 1..$]) { 367 | assert(key1.production.id < key2.production.id); 368 | auto intersection = (grammar_items[key1] & grammar_items[key2]); 369 | if (intersection.cardinality > 0 && trivially_true(key1.production.predicate)) { 370 | if (key1.production.has_error_recovery_tail) { 371 | grammar_items[key1] -= intersection; 372 | } else if (key2.production.has_error_recovery_tail) { 373 | grammar_items[key2] -= intersection; 374 | } else { 375 | // Default: resolve in favour of first declared 376 | // production but mark as unresolved giving the 377 | // user the option of accepting the resolution 378 | // or not (down the track) 379 | grammar_items[key2] -= intersection; 380 | reduce_reduce_conflicts ~= ReduceReduceConflict([key1, key2], intersection); 381 | } 382 | } 383 | } 384 | } 385 | return reduce_reduce_conflicts.length; 386 | } 387 | 388 | Set!TokenSymbol get_look_ahead_set() 389 | { 390 | auto look_ahead_set = extract_key_set(shift_list); 391 | foreach (key; grammar_items.get_reducible_keys()) { 392 | look_ahead_set |= grammar_items[key]; 393 | } 394 | return look_ahead_set; 395 | } 396 | 397 | string[] generate_action_code_text() 398 | { 399 | string[] code_text_lines = ["switch (dd_next_token) {"]; 400 | string expected_tokens_list; 401 | auto shift_token_set = extract_key_set(shift_list); 402 | foreach (token; shift_token_set) { 403 | code_text_lines ~= format("case %s: return dd_shift!(%s);", token.name, shift_list[token].id); 404 | } 405 | auto item_keys = grammar_items.get_reducible_keys(); 406 | if (item_keys.cardinality == 0) { 407 | assert(shift_token_set.cardinality > 0); 408 | expected_tokens_list = token_list_string(shift_token_set.elements); 409 | } else { 410 | struct Pair { Set!TokenSymbol look_ahead_set; Set!GrammarItemKey production_set; }; 411 | Pair[] pairs; 412 | auto combined_look_ahead = Set!TokenSymbol(); 413 | foreach (item_key; item_keys) { 414 | combined_look_ahead |= grammar_items[item_key]; 415 | } 416 | expected_tokens_list = token_list_string((shift_token_set | combined_look_ahead).elements); 417 | foreach (token; combined_look_ahead) { 418 | auto production_set = Set!GrammarItemKey(); 419 | foreach (item_key; item_keys) { 420 | if (grammar_items[item_key].contains(token)) { 421 | production_set += item_key; 422 | } 423 | } 424 | auto i = 0; 425 | for (i = 0; i < pairs.length; i++) { 426 | if (pairs[i].production_set == production_set) break; 427 | } 428 | if (i < pairs.length) { 429 | pairs[i].look_ahead_set += token; 430 | } else { 431 | pairs ~= Pair(Set!TokenSymbol(token), production_set); 432 | } 433 | } 434 | foreach (pair; pairs) { 435 | code_text_lines ~= format("case %s:", token_list_string(pair.look_ahead_set.elements)); 436 | auto keys = pair.production_set.elements; 437 | if (trivially_true(keys[0].production.predicate)) { 438 | assert(keys.length == 1); 439 | if (keys[0].production.id == 0) { 440 | code_text_lines ~= " return dd_accept!();"; 441 | } else { 442 | code_text_lines ~= format(" return dd_reduce!(%s); // %s", keys[0].production.id, keys[0].production); 443 | } 444 | } else { 445 | code_text_lines ~= format(" if (%s) {", keys[0].production.expanded_predicate); 446 | code_text_lines ~= format(" return dd_reduce!(%s); // %s", keys[0].production.id, keys[0].production); 447 | if (keys.length == 1) { 448 | code_text_lines ~= " } else {"; 449 | code_text_lines ~= format(" throw new DDSyntaxError([%s]);", expected_tokens_list); 450 | code_text_lines ~= " }"; 451 | continue; 452 | } 453 | for (auto i = 1; i < keys.length - 1; i++) { 454 | assert(!trivially_true(keys[i].production.predicate)); 455 | code_text_lines ~= format(" } else if (%s) {", keys[i].production.expanded_predicate); 456 | code_text_lines ~= format(" return dd_reduce!(%s); // %s", keys[i].production.id, keys[i].production); 457 | } 458 | if (trivially_true(keys[$ - 1].production.predicate)) { 459 | code_text_lines ~= " } else {"; 460 | if (keys[$ - 1].production.id == 0) { 461 | code_text_lines ~= " return dd_accept;"; 462 | } else { 463 | code_text_lines ~= format(" return dd_reduce!(%s); // %s", keys[$ - 1].production.id, keys[$ - 1].production); 464 | } 465 | code_text_lines ~= " }"; 466 | } else { 467 | code_text_lines ~= format(" } else if (%s) {", keys[$ - 1].production.expanded_predicate); 468 | code_text_lines ~= format(" return dd_reduce!(%s); // %s", keys[$ - 1].production.id, keys[$ - 1].production); 469 | code_text_lines ~= " } else {"; 470 | code_text_lines ~= format(" throw new DDSyntaxError([%s]);", expected_tokens_list); 471 | code_text_lines ~= " }"; 472 | } 473 | } 474 | } 475 | } 476 | code_text_lines ~= "default:"; 477 | code_text_lines ~= format(" throw new DDSyntaxError([%s]);", expected_tokens_list); 478 | code_text_lines ~= "}"; 479 | return code_text_lines; 480 | } 481 | 482 | string[] generate_goto_code_text() 483 | { 484 | string[] code_text_lines = ["switch (dd_non_terminal) {"]; 485 | foreach (symbol; goto_table.keys.wa_sort) { 486 | code_text_lines ~= format("case %s: return %s;", symbol.name, goto_table[symbol].id); 487 | } 488 | code_text_lines ~= "default:"; 489 | code_text_lines ~= format(" throw new Exception(format(\"Malformed goto table: no entry for (%%s , %s)\", dd_non_terminal));", id); 490 | code_text_lines ~= "}"; 491 | return code_text_lines; 492 | } 493 | 494 | string get_description() 495 | { 496 | auto str = format("State<%s>:\n Grammar Items:\n", id); 497 | foreach (item_key; grammar_items.keys.wa_sort) { 498 | str ~= format(" %s: %s\n", item_key, grammar_items[item_key]); 499 | } 500 | auto look_ahead_set = get_look_ahead_set(); 501 | str ~= format(" Parser Action Table:\n"); 502 | if (look_ahead_set.cardinality == 0) { 503 | str ~= " \n"; 504 | } else { 505 | auto reducable_item_keys = grammar_items.get_reducible_keys(); 506 | foreach (token; look_ahead_set) { 507 | if (token in shift_list) { 508 | str ~= format(" %s: shift: -> State<%s>\n", token, shift_list[token].id); 509 | } else { 510 | foreach (reducible_item_key; reducable_item_keys) { 511 | if (grammar_items[reducible_item_key].contains(token)) { 512 | str ~= format(" %s: reduce: %s\n", token, reducible_item_key.production); 513 | } 514 | } 515 | } 516 | } 517 | } 518 | str ~= " Go To Table:\n"; 519 | if (goto_table.length == 0) { 520 | str ~= " \n"; 521 | } else { 522 | foreach (non_terminal; goto_table.keys.wa_sort) { 523 | str ~= format(" %s -> %s\n", non_terminal, goto_table[non_terminal]); 524 | } 525 | } 526 | if (error_recovery_state is null) { 527 | str ~= " Error Recovery State: \n"; 528 | } else { 529 | str ~= format(" Error Recovery State: State<%s>\n", error_recovery_state.id); 530 | str ~= format(" Look Ahead: %s\n", error_recovery_state.get_look_ahead_set()); 531 | } 532 | if (shift_reduce_conflicts.length > 0) { 533 | str ~= " Shift/Reduce Conflicts:\n"; 534 | foreach (src; shift_reduce_conflicts) { 535 | str ~= format(" %s:\n", src.shift_symbol); 536 | str ~= format(" shift -> State<%s>\n", src.goto_state.id); 537 | str ~= format(" reduce %s : %s\n", src.reducible_item.production, src.look_ahead_set); 538 | } 539 | } 540 | if (reduce_reduce_conflicts.length > 0) { 541 | str ~= " Reduce/Reduce Conflicts:\n"; 542 | foreach (rrc; reduce_reduce_conflicts) { 543 | str ~= format(" %s:\n", rrc.look_ahead_set_intersection); 544 | str ~= format(" reduce %s : %s\n", rrc.reducible_item[0].production, grammar_items[rrc.reducible_item[0]]); 545 | str ~= format(" reduce %s : %s\n", rrc.reducible_item[1].production, grammar_items[rrc.reducible_item[1]]); 546 | } 547 | } 548 | return str; 549 | } 550 | 551 | override string toString() 552 | { 553 | return format("State<%s>", id); 554 | } 555 | } 556 | 557 | class GrammarSpecification { 558 | SymbolTable symbol_table; 559 | Production[] production_list; 560 | string header_code_text; 561 | string preamble_code_text; 562 | string coda_code_text; 563 | 564 | invariant () { 565 | for (auto i = 0; i < production_list.length; i++) assert(production_list[i].id == i); 566 | } 567 | 568 | this() 569 | { 570 | symbol_table = SymbolTable(); 571 | // Set the right hand side when start symbol is known. 572 | auto dummy_prod = new_production(symbol_table.get_special_symbol(SpecialSymbols.start), [], null, null); 573 | assert(dummy_prod.id == 0); 574 | } 575 | 576 | @property 577 | ProductionId next_production_id() 578 | { 579 | return cast(ProductionId) production_list.length; 580 | } 581 | 582 | Production new_production(NonTerminalSymbol lhs, Symbol[] rhs, Predicate pred, SemanticAction actn, AssociativePrecedence aprec=AssociativePrecedence()) 583 | { 584 | auto new_prodn = new Production(next_production_id, lhs, rhs, pred, actn, aprec); 585 | production_list ~= new_prodn; 586 | if (new_prodn.id == 1) { 587 | production_list[0].right_hand_side = [new_prodn.left_hand_side]; 588 | new_prodn.left_hand_side.used_at ~= new_prodn.left_hand_side.defined_at; 589 | } 590 | return new_prodn; 591 | } 592 | 593 | void set_header(string header) 594 | { 595 | header_code_text = header; 596 | } 597 | 598 | void set_preamble(string preamble) 599 | { 600 | preamble_code_text = preamble; 601 | } 602 | 603 | void set_coda(string coda) 604 | { 605 | coda_code_text = coda; 606 | } 607 | 608 | Set!TokenSymbol FIRST(Symbol[] symbol_string, TokenSymbol token) 609 | { 610 | auto token_set = Set!TokenSymbol(); 611 | foreach (symbol; symbol_string) { 612 | auto firsts_data = get_firsts_data(symbol); 613 | token_set |= firsts_data.tokenset; 614 | if (!firsts_data.transparent) { 615 | return token_set; 616 | } 617 | } 618 | token_set += token; 619 | return token_set; 620 | } 621 | 622 | FirstsData get_firsts_data(ref Symbol symbol) 623 | { 624 | if (symbol.firsts_data is null ) { 625 | auto token_set = Set!TokenSymbol(); 626 | auto transparent = false; 627 | if (symbol.type == SymbolType.token) { 628 | token_set += symbol; 629 | } else if (symbol.type == SymbolType.non_terminal) { 630 | // We need to establish transparency first 631 | Production[] relevant_productions; 632 | foreach (production; production_list) { 633 | if (production.left_hand_side != symbol) continue; 634 | transparent = transparent || (production.length == 0); 635 | relevant_productions ~= production; 636 | } 637 | bool transparency_changed; 638 | do { 639 | // TODO: modify this to only redo those before the change in transparency 640 | transparency_changed = false; 641 | foreach (production; relevant_productions) { 642 | auto transparent_production = true; 643 | foreach (rhs_symbol; production.right_hand_side) { 644 | if (rhs_symbol == symbol) { 645 | if (transparent) { 646 | continue; 647 | } else { 648 | transparent_production = false; 649 | break; 650 | } 651 | } 652 | auto firsts_data = get_firsts_data(rhs_symbol); 653 | token_set |= firsts_data.tokenset; 654 | if (!firsts_data.transparent) { 655 | transparent_production = false; 656 | break; 657 | } 658 | } 659 | if (transparent_production) { 660 | transparency_changed = !transparent; 661 | transparent = true; 662 | } 663 | } 664 | } while (transparency_changed); 665 | } 666 | symbol.firsts_data = new FirstsData(token_set, transparent); 667 | } 668 | return symbol.firsts_data; 669 | } 670 | 671 | GrammarItemSet closure(GrammarItemSet closure_set) 672 | { 673 | // NB: if called with an lValue arg that lValue will also be modified 674 | bool additions_made; 675 | do { 676 | additions_made = false; 677 | foreach (item_key, look_ahead_set; closure_set) { 678 | if (!item_key.is_closable) continue; 679 | auto prospective_lhs = item_key.next_symbol; 680 | foreach (look_ahead_symbol; look_ahead_set) { 681 | auto firsts = FIRST(item_key.tail, look_ahead_symbol); 682 | foreach (production; production_list) { 683 | if (prospective_lhs != production.left_hand_side) continue; 684 | auto prospective_key = GrammarItemKey(production); 685 | if (prospective_key in closure_set) { 686 | auto cardinality = closure_set[prospective_key].cardinality; 687 | closure_set[prospective_key] |= firsts; 688 | additions_made = additions_made || closure_set[prospective_key].cardinality > cardinality; 689 | } else { 690 | // NB: need clone to ensure each GrammarItems don't share look ahead sets 691 | closure_set[prospective_key] = firsts.clone(); 692 | additions_made = true; 693 | } 694 | } 695 | } 696 | } 697 | } while (additions_made); 698 | return closure_set; 699 | } 700 | 701 | string[] get_description() 702 | { 703 | auto text_lines = symbol_table.get_description(); 704 | text_lines ~= "Productions:"; 705 | foreach (pdn; production_list) { 706 | text_lines ~= format(" %s: %s: %s: %s", pdn.id, pdn, pdn.associativity, pdn.precedence); 707 | } 708 | return text_lines; 709 | } 710 | } 711 | 712 | string quote_raw(string str) 713 | { 714 | static auto re = regex(r"`", "g"); 715 | return "`" ~ replace(str, re, "`\"`\"`") ~ "`"; 716 | } 717 | 718 | class Grammar { 719 | GrammarSpecification spec; 720 | ParserState[] parser_states; 721 | Set!(ParserState)[ParserState][NonTerminalSymbol] goto_table; 722 | ParserStateId[] empty_look_ahead_sets; 723 | size_t unresolved_sr_conflicts; 724 | size_t unresolved_rr_conflicts; 725 | 726 | invariant () { 727 | for (auto i = 0; i < parser_states.length; i++) assert(parser_states[i].id == i); 728 | } 729 | 730 | @property 731 | size_t total_unresolved_conflicts() 732 | { 733 | return unresolved_rr_conflicts + unresolved_sr_conflicts; 734 | } 735 | 736 | @property 737 | ParserStateId next_parser_state_id() 738 | { 739 | return cast(ParserStateId) parser_states.length; 740 | } 741 | 742 | ParserState new_parser_state(GrammarItemSet kernel) 743 | { 744 | auto nps = new ParserState(next_parser_state_id, kernel); 745 | parser_states ~= nps; 746 | return nps; 747 | } 748 | 749 | this(GrammarSpecification specification) 750 | { 751 | spec = specification; 752 | auto start_item_key = GrammarItemKey(spec.production_list[0]); 753 | auto start_look_ahead_set = Set!(TokenSymbol)(spec.symbol_table.get_special_symbol(SpecialSymbols.end)); 754 | GrammarItemSet start_kernel = spec.closure([ start_item_key : start_look_ahead_set]); 755 | auto start_state = new_parser_state(start_kernel); 756 | while (true) { 757 | // Find a state that needs processing or quit 758 | ParserState unprocessed_state = null; 759 | // Go through states in id order 760 | foreach (parser_state; parser_states) { 761 | if (parser_state.state != ProcessedState.processed) { 762 | unprocessed_state = parser_state; 763 | break; 764 | } 765 | } 766 | if (unprocessed_state is null) break; 767 | 768 | auto first_time = unprocessed_state.state == ProcessedState.unprocessed; 769 | unprocessed_state.state = ProcessedState.processed; 770 | auto already_done = Set!Symbol(); 771 | // do items in order 772 | foreach (item_key; unprocessed_state.grammar_items.keys.wa_sort){ 773 | if (item_key.is_reducible) continue; 774 | ParserState goto_state; 775 | auto symbol_x = item_key.next_symbol; 776 | if (already_done.contains(symbol_x)) continue; 777 | already_done += symbol_x; 778 | auto item_set_x = spec.closure(unprocessed_state.grammar_items.generate_goto_kernel(symbol_x)); 779 | auto equivalent_state = find_equivalent_state(item_set_x); 780 | if (equivalent_state is null) { 781 | goto_state = new_parser_state(item_set_x); 782 | } else { 783 | foreach (item_key, look_ahead_set; item_set_x) { 784 | if (!equivalent_state.grammar_items[item_key].is_superset_of(look_ahead_set)) { 785 | equivalent_state.grammar_items[item_key] |= look_ahead_set; 786 | if (equivalent_state.state == ProcessedState.processed) { 787 | equivalent_state.state = ProcessedState.needs_reprocessing; 788 | } 789 | } 790 | } 791 | goto_state = equivalent_state; 792 | } 793 | if (first_time) { 794 | if (symbol_x.type == SymbolType.token) { 795 | unprocessed_state.shift_list[symbol_x] = goto_state; 796 | } else { 797 | assert(symbol_x !in unprocessed_state.goto_table); 798 | unprocessed_state.goto_table[symbol_x] = goto_state; 799 | } 800 | if (symbol_x.id == SpecialSymbols.parse_error) { 801 | unprocessed_state.error_recovery_state = goto_state; 802 | } 803 | } 804 | } 805 | 806 | } 807 | foreach (parser_state; parser_states) { 808 | if (parser_state.get_look_ahead_set().cardinality == 0) { 809 | empty_look_ahead_sets ~= parser_state.id; 810 | } 811 | unresolved_sr_conflicts += parser_state.resolve_shift_reduce_conflicts(); 812 | unresolved_rr_conflicts += parser_state.resolve_reduce_reduce_conflicts(); 813 | } 814 | } 815 | 816 | ParserState find_equivalent_state(GrammarItemSet grammar_item_set) 817 | { 818 | // TODO: check if this needs to use only kernel keys 819 | auto target_key_set = grammar_item_set.get_kernel_keys(); 820 | foreach (parser_state; parser_states) { 821 | if (target_key_set == parser_state.grammar_items.get_kernel_keys()) { 822 | return parser_state; 823 | } 824 | } 825 | return null; 826 | } 827 | 828 | string[] generate_symbol_enum_code_text() 829 | { 830 | // TODO: determine type for DDSymbol from maximum symbol id 831 | string[] text_lines = ["alias ushort DDSymbol;\n"]; 832 | text_lines ~= "enum DDHandle : DDSymbol {"; 833 | foreach (token; spec.symbol_table.get_special_tokens_ordered()) { 834 | text_lines ~= format(" %s = %s,", token.name, token.id); 835 | } 836 | auto ordered_tokens = spec.symbol_table.get_tokens_ordered(); 837 | foreach (token; ordered_tokens) { 838 | text_lines ~= format(" %s = %s,", token.name, token.id); 839 | } 840 | text_lines ~= "}\n"; 841 | text_lines ~= "string dd_literal_token_string(DDHandle dd_token)"; 842 | text_lines ~= "{"; 843 | text_lines ~= " with (DDHandle) switch (dd_token) {"; 844 | foreach (token; ordered_tokens) { 845 | if (token.pattern[0] == '"') { 846 | text_lines ~= format(" case %s: return %s; break;", token.name, token.pattern); 847 | } 848 | } 849 | text_lines ~= " default:"; 850 | text_lines ~= " }"; 851 | text_lines ~= " return null;"; 852 | text_lines ~= "}\n"; 853 | text_lines ~= "enum DDNonTerminal : DDSymbol {"; 854 | foreach (non_terminal; spec.symbol_table.get_special_non_terminals_ordered()) { 855 | text_lines ~= format(" %s = %s,", non_terminal.name, non_terminal.id); 856 | } 857 | foreach (non_terminal; spec.symbol_table.get_non_terminals_ordered()) { 858 | text_lines ~= format(" %s = %s,", non_terminal.name, non_terminal.id); 859 | } 860 | text_lines ~= "}\n"; 861 | return text_lines; 862 | } 863 | 864 | string[] generate_lexan_token_code_text() 865 | { 866 | string[] text_lines = ["static DDLexicalAnalyser dd_lexical_analyser;"]; 867 | text_lines ~= "static this() {"; 868 | text_lines ~= " static auto dd_lit_lexemes = ["; 869 | foreach (token; spec.symbol_table.get_tokens_ordered()) { 870 | if (!(token.pattern[0] == '"' && token.pattern[$-1] == '"')) continue; 871 | text_lines ~= format(" DDLiteralLexeme(DDHandle.%s, %s),", token.name, token.pattern); 872 | } 873 | text_lines ~= " ];\n"; 874 | text_lines ~= " static auto dd_regex_lexemes = ["; 875 | foreach (token; spec.symbol_table.get_tokens_ordered()) { 876 | if ((token.pattern[0] == '"' && token.pattern[$-1] == '"')) continue; 877 | text_lines ~= format(" DDRegexLexeme!(DDHandle.%s, %s),", token.name, quote_raw(token.pattern)); 878 | } 879 | text_lines ~= " ];\n"; 880 | text_lines ~= " static auto dd_skip_rules = ["; 881 | foreach (rule; spec.symbol_table.get_skip_rules()) { 882 | text_lines ~= format(" regex(%s),", quote_raw("^" ~ rule)); 883 | } 884 | text_lines ~= " ];\n"; 885 | text_lines ~= " dd_lexical_analyser = new DDLexicalAnalyser(dd_lit_lexemes, dd_regex_lexemes, dd_skip_rules);"; 886 | text_lines ~= "}\n"; 887 | return text_lines; 888 | } 889 | 890 | string[] generate_attributes_code_text() 891 | { 892 | string[] text_lines = ["struct DDAttributes {"]; 893 | text_lines ~= " DDCharLocation dd_location;"; 894 | text_lines ~= " string dd_matched_text;"; 895 | auto fields = spec.symbol_table.get_field_definitions(); 896 | if (fields.length > 0) { 897 | text_lines ~= " union {"; 898 | text_lines ~= " DDSyntaxErrorData dd_syntax_error_data;"; 899 | foreach (field; fields) { 900 | text_lines ~= format(" %s %s;", field.field_type, field.field_name); 901 | } 902 | text_lines ~= " }\n"; 903 | } else { 904 | text_lines ~= " DDSyntaxErrorData dd_syntax_error_data;\n"; 905 | } 906 | text_lines ~= " this (DDToken token)"; 907 | text_lines ~= " {"; 908 | text_lines ~= " dd_location = token.location;"; 909 | text_lines ~= " dd_matched_text = token.matched_text;"; 910 | text_lines ~= " if (token.is_valid_match) {"; 911 | text_lines ~= " dd_set_attribute_value(this, token.handle, token.matched_text);"; 912 | text_lines ~= " }"; 913 | text_lines ~= " }"; 914 | text_lines ~= "}\n\n"; 915 | text_lines ~= "void dd_set_attribute_value(ref DDAttributes attrs, DDHandle dd_token, string text)"; 916 | text_lines ~= "{"; 917 | Set!TokenSymbol[string] token_sets; 918 | foreach(token; spec.symbol_table.get_tokens_ordered()) { 919 | if (token.field_name.length > 0) { 920 | if (token.field_name !in token_sets) { 921 | token_sets[token.field_name] = Set!TokenSymbol(token); 922 | } else { 923 | token_sets[token.field_name] += token; 924 | } 925 | } 926 | } 927 | if (token_sets.length > 0) { 928 | text_lines ~= " with (DDHandle) switch (dd_token) {"; 929 | foreach (field; fields) { 930 | if (field.field_name in token_sets) { 931 | text_lines ~= format(" case %s:", token_list_string(token_sets[field.field_name].elements)); 932 | if (field.conversion_function_name.length > 0) { 933 | text_lines ~= format(" attrs.%s = %s(text);", field.field_name, field.conversion_function_name); 934 | } else { 935 | text_lines ~= format(" attrs.%s = to!(%s)(text);", field.field_name, field.field_type); 936 | } 937 | text_lines ~= " break;"; 938 | } 939 | } 940 | text_lines ~= " default:"; 941 | text_lines ~= " // Do nothing"; 942 | text_lines ~= " }"; 943 | } 944 | text_lines ~= "}\n"; 945 | return text_lines; 946 | } 947 | 948 | string[] generate_production_data_code_text() 949 | { 950 | string[] text_lines = ["alias uint DDProduction;"]; 951 | text_lines ~= "DDProductionData dd_get_production_data(DDProduction dd_production)"; 952 | text_lines ~= "{"; 953 | text_lines ~= " with (DDNonTerminal) switch(dd_production) {"; 954 | foreach (production; spec.production_list) { 955 | text_lines ~= format(" case %s: return DDProductionData(%s, %s);", production.id, production.left_hand_side.name, production.right_hand_side.length); 956 | } 957 | text_lines ~= " default:"; 958 | text_lines ~= " throw new Exception(\"Malformed production data table\");"; 959 | text_lines ~= " }"; 960 | text_lines ~= " assert(false);"; 961 | text_lines ~= "}\n"; 962 | return text_lines; 963 | } 964 | 965 | string[] generate_semantic_code_text() 966 | { 967 | string[] text_lines = ["void"]; 968 | text_lines ~= "dd_do_semantic_action(ref DDAttributes dd_lhs, DDProduction dd_production, DDAttributes[] dd_args, void delegate(string, string) dd_inject)"; 969 | text_lines ~= "{"; 970 | text_lines ~= " switch(dd_production) {"; 971 | foreach (production; spec.production_list) { 972 | if (production.action.length > 0) { 973 | text_lines ~= format(" case %s: // %s", production.id, production); 974 | text_lines ~= production.expanded_semantic_action; 975 | text_lines ~= " break;"; 976 | } 977 | } 978 | text_lines ~= " default:"; 979 | text_lines ~= " // Do nothing"; 980 | text_lines ~= " }"; 981 | text_lines ~= "}\n"; 982 | return text_lines; 983 | } 984 | 985 | string[] generate_action_table_code_text() 986 | { 987 | string[] code_text_lines = []; 988 | code_text_lines ~= "DDParseAction dd_get_next_action(DDParserState dd_current_state, DDHandle dd_next_token, in DDAttributes[] dd_attribute_stack)"; 989 | code_text_lines ~= "{"; 990 | code_text_lines ~= " with (DDHandle) switch(dd_current_state) {"; 991 | // Do this in state id order 992 | auto indent = " "; 993 | foreach (parser_state; parser_states) { 994 | code_text_lines ~= format(" case %s:", parser_state.id); 995 | foreach (line; parser_state.generate_action_code_text()) { 996 | auto indented_line = indent ~ line; 997 | code_text_lines ~= indented_line; 998 | } 999 | code_text_lines ~= " break;"; 1000 | } 1001 | code_text_lines ~= " default:"; 1002 | code_text_lines ~= " throw new Exception(format(\"Invalid parser state: %s\", dd_current_state));"; 1003 | code_text_lines ~= " }"; 1004 | code_text_lines ~= " assert(false);"; 1005 | code_text_lines ~= "}\n"; 1006 | return code_text_lines; 1007 | } 1008 | 1009 | string[] generate_goto_table_code_text() 1010 | { 1011 | string[] code_text_lines = ["alias uint DDParserState;"]; 1012 | code_text_lines ~= "DDParserState dd_get_goto_state(DDNonTerminal dd_non_terminal, DDParserState dd_current_state)"; 1013 | code_text_lines ~= "{"; 1014 | code_text_lines ~= " with (DDNonTerminal) switch(dd_current_state) {"; 1015 | // Do this in state id order 1016 | auto indent = " "; 1017 | foreach (parser_state; parser_states) { 1018 | if (parser_state.goto_table.length == 0) continue; 1019 | code_text_lines ~= format(" case %s:", parser_state.id); 1020 | foreach (line; parser_state.generate_goto_code_text()) { 1021 | auto indented_line = indent ~ line; 1022 | code_text_lines ~= indented_line; 1023 | } 1024 | code_text_lines ~= " break;"; 1025 | } 1026 | code_text_lines ~= " default:"; 1027 | code_text_lines ~= " throw new Exception(format(\"Malformed goto table: no entry for (%s, %s).\", dd_non_terminal, dd_current_state));"; 1028 | code_text_lines ~= " }"; 1029 | code_text_lines ~= " throw new Exception(format(\"Malformed goto table: no entry for (%s, %s).\", dd_non_terminal, dd_current_state));"; 1030 | code_text_lines ~= "}\n"; 1031 | return code_text_lines; 1032 | } 1033 | 1034 | string[] generate_error_recovery_code_text() 1035 | { 1036 | string[] code_text_lines = ["bool dd_error_recovery_ok(DDParserState dd_parser_state, DDHandle dd_token)"]; 1037 | code_text_lines ~= "{"; 1038 | code_text_lines ~= " with (DDHandle) switch(dd_parser_state) {"; 1039 | // Do this in state id order 1040 | foreach (parser_state; parser_states) { 1041 | if (parser_state.error_recovery_state is null) continue; 1042 | auto error_recovery_set = Set!TokenSymbol(); 1043 | foreach (item_key, look_ahead_set; parser_state.error_recovery_state.grammar_items) { 1044 | if (item_key.dot > 0 && item_key.production.right_hand_side[item_key.dot - 1].id == SpecialSymbols.parse_error) { 1045 | error_recovery_set |= look_ahead_set; 1046 | } 1047 | } 1048 | if (error_recovery_set.cardinality > 0) { 1049 | code_text_lines ~= format(" case %s:", parser_state.id); 1050 | code_text_lines ~= " switch (dd_token) {"; 1051 | code_text_lines ~= format(" case %s:", token_list_string(error_recovery_set.elements)); 1052 | code_text_lines ~= " return true;"; 1053 | code_text_lines ~= " default:"; 1054 | code_text_lines ~= " return false;"; 1055 | code_text_lines ~= " }"; 1056 | code_text_lines ~= " break;"; 1057 | } 1058 | } 1059 | code_text_lines ~= " default:"; 1060 | code_text_lines ~= " }"; 1061 | code_text_lines ~= " return false;"; 1062 | code_text_lines ~= "}\n"; 1063 | return code_text_lines; 1064 | } 1065 | 1066 | void write_parser_code(File output_file, string module_name="") 1067 | in { 1068 | assert(output_file.isOpen); 1069 | assert(output_file.size == 0); 1070 | } 1071 | body { 1072 | if (module_name.length > 0) { 1073 | output_file.writefln("module %s;\n", module_name); 1074 | } 1075 | output_file.writeln(stripLeft(spec.header_code_text)); 1076 | output_file.writeln("import std.regex;\nimport std.string;\nimport std.conv;\n"); 1077 | output_file.writeln("import ddlib.templates;\n"); 1078 | output_file.writeln("mixin DDParserSupport;\n"); 1079 | foreach (line; generate_symbol_enum_code_text()) { 1080 | output_file.writeln(line); 1081 | } 1082 | foreach (line; generate_lexan_token_code_text()) { 1083 | output_file.writeln(line); 1084 | } 1085 | foreach (line; generate_production_data_code_text()) { 1086 | output_file.writeln(line); 1087 | } 1088 | foreach (line; generate_attributes_code_text()) { 1089 | output_file.writeln(line); 1090 | } 1091 | foreach (line; generate_goto_table_code_text()) { 1092 | output_file.writeln(line); 1093 | } 1094 | foreach (line; generate_error_recovery_code_text()) { 1095 | output_file.writeln(line); 1096 | } 1097 | output_file.writeln(spec.preamble_code_text); 1098 | foreach (line; generate_semantic_code_text()) { 1099 | output_file.writeln(line); 1100 | } 1101 | foreach (line; generate_action_table_code_text()) { 1102 | output_file.writeln(line); 1103 | } 1104 | output_file.writeln("\nmixin DDImplementParser;"); 1105 | output_file.writeln(spec.coda_code_text.stripRight); 1106 | output_file.close(); 1107 | } 1108 | 1109 | string get_parser_states_description() 1110 | { 1111 | string str; 1112 | foreach (parser_state; parser_states) { 1113 | str ~= parser_state.get_description(); 1114 | } 1115 | return str; 1116 | } 1117 | } 1118 | -------------------------------------------------------------------------------- /idnumber.d: -------------------------------------------------------------------------------- 1 | // idnumber.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | module idnumber; 10 | 11 | mixin template IdNumber(T) { 12 | const T id; 13 | 14 | override hash_t toHash() 15 | { 16 | return id; 17 | } 18 | 19 | override bool opEquals(Object o) 20 | { 21 | return id == (cast(typeof(this)) o).id; 22 | } 23 | 24 | override int opCmp(Object o) 25 | { 26 | return id - (cast(typeof(this)) o).id; 27 | } 28 | } 29 | 30 | mixin template UniqueId(T) { 31 | mixin IdNumber!(T); 32 | protected static T next_id; 33 | 34 | const string set_unique_id = "id = next_id++;"; 35 | } 36 | -------------------------------------------------------------------------------- /sets.d: -------------------------------------------------------------------------------- 1 | // sets.d 2 | // 3 | // Copyright Peter Williams 2016 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | module sets; 10 | 11 | import std.string: format; 12 | import std.algorithm: copy, find; 13 | import std.traits: isAssignable; 14 | import std.range: retro; 15 | 16 | import workarounds: wa_sort; 17 | 18 | // for use in unit tests 19 | mixin template DummyClass() { 20 | class Dummy { 21 | int val; 22 | this(int ival) { val = ival; }; 23 | override int opCmp(Object o) 24 | { 25 | return val - (cast(Dummy) o).val; 26 | } 27 | override bool opEquals(Object o) 28 | { 29 | return val == (cast(Dummy) o).val; 30 | } 31 | override string toString() const { 32 | return format("Dummy(%s)", val); 33 | } 34 | } 35 | 36 | Dummy[] list_of_dummies(in int[] int_values...) 37 | body { 38 | Dummy[] list; 39 | foreach (int_value; int_values) { 40 | list ~= new Dummy(int_value); 41 | } 42 | return list; 43 | } 44 | } 45 | 46 | // Does this list contain the item? For use in contracts. 47 | private bool contains(T)(in T[] list, in T item) 48 | { 49 | auto tail = find(list, item); 50 | return tail.length && tail[0] == item; 51 | } 52 | unittest { 53 | assert(contains([1, 3, 9, 12], 3)); 54 | assert(!contains([1, 3, 9, 12], 5)); 55 | } 56 | 57 | private bool is_ordered(T)(in T[] list) 58 | { 59 | for (auto j = 1; j < list.length; j++) { 60 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 61 | if (cast(T) list[j - 1] > cast(T) list[j]) return false; 62 | } else { 63 | if (list[j - 1] > list[j]) return false; 64 | } 65 | } 66 | return true; 67 | } 68 | unittest { 69 | assert(is_ordered(new int[0])); 70 | assert(is_ordered([1])); 71 | assert(is_ordered([1, 1])); 72 | assert(is_ordered([1, 2, 3, 4, 5, 6, 7])); 73 | assert(!is_ordered([1, 2, 4, 3, 5, 6, 7])); 74 | assert(is_ordered([1, 2, 3, 5, 5, 6, 7])); 75 | mixin DummyClass; 76 | // first test list of dummies works 77 | assert(list_of_dummies(6, 2, 3, 4, 5, 6, 7) == [new Dummy(6), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(5), new Dummy(6), new Dummy(7)]); 78 | assert(is_ordered([new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(5), new Dummy(6), new Dummy(7)])); 79 | assert(!is_ordered([new Dummy(1), new Dummy(2), new Dummy(4), new Dummy(3), new Dummy(5), new Dummy(6), new Dummy(7)])); 80 | assert(is_ordered([new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(5), new Dummy(5), new Dummy(6), new Dummy(7)])); 81 | } 82 | 83 | private bool is_ordered_no_dups(T)(in T[] list) 84 | { 85 | for (auto j = 1; j < list.length; j++) { 86 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 87 | if (cast(T) list[j - 1] >= cast(T) list[j]) return false; 88 | } else { 89 | if (list[j - 1] >= list[j]) return false; 90 | } 91 | } 92 | return true; 93 | } 94 | unittest { 95 | assert(is_ordered_no_dups(new int[0])); 96 | assert(is_ordered_no_dups([1])); 97 | assert(!is_ordered_no_dups([1, 1])); 98 | assert(is_ordered_no_dups([1, 2, 3, 4, 5, 6, 7])); 99 | assert(!is_ordered_no_dups([1, 2, 4, 3, 5, 6, 7])); 100 | assert(!is_ordered_no_dups([1, 2, 3, 5, 5, 6, 7])); 101 | mixin DummyClass; 102 | assert(is_ordered_no_dups([new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(5), new Dummy(6), new Dummy(7)])); 103 | assert(!is_ordered_no_dups([new Dummy(1), new Dummy(2), new Dummy(4), new Dummy(3), new Dummy(5), new Dummy(6), new Dummy(7)])); 104 | assert(!is_ordered_no_dups([new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(5), new Dummy(5), new Dummy(6), new Dummy(7)])); 105 | } 106 | 107 | private T[] remove_adj_dups(T)(T[] list) 108 | in { 109 | assert(is_ordered(list)); 110 | } 111 | out (result) { 112 | for (auto i = 1; i < result.length; i++) assert(result[i - 1] != result[i]); 113 | foreach (item; list) assert(result.contains(item)); 114 | } 115 | body { 116 | if (list.length > 1) { 117 | // Remove any duplicates 118 | size_t last_index = 0; 119 | for (size_t index = 1; index < list.length; index++) { 120 | if (list[index] != list[last_index]) { 121 | list[++last_index] = list[index]; 122 | } 123 | } 124 | list.length = last_index + 1; 125 | } 126 | return list; 127 | } 128 | unittest { 129 | int[] empty; 130 | assert(remove_adj_dups(empty) == []); 131 | int[] single = [1]; 132 | assert(remove_adj_dups(single) == [1]); 133 | int[] pair = [1, 1]; 134 | assert(remove_adj_dups(pair) == [1]); 135 | int[] few = [1, 1, 5, 6, 6, 9]; 136 | assert(remove_adj_dups(few) == [1, 5, 6, 9]); 137 | assert(remove_adj_dups([1, 1, 5, 6, 6, 6, 6, 7, 8, 9, 9, 9, 9, 9]) == [1, 5, 6, 7, 8, 9]); 138 | mixin DummyClass; 139 | Dummy[] dfew = [new Dummy(1), new Dummy(1), new Dummy(5), new Dummy(6), new Dummy(6), new Dummy(9)]; 140 | assert(remove_adj_dups(dfew) == [new Dummy(1), new Dummy(5), new Dummy(6), new Dummy(9)]); 141 | } 142 | 143 | struct BinarySearchResult { 144 | bool found; // whether the item was found 145 | size_t index; // location of item if found else "insert before" point 146 | } 147 | 148 | BinarySearchResult binary_search(T)(in T[] list, in T item) 149 | in { 150 | assert(is_ordered_no_dups(list)); 151 | } 152 | out (result) { 153 | if (result.found) { 154 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 155 | assert(cast(T) list[result.index] == cast(T) item); 156 | } else { 157 | assert(list[result.index] == item); 158 | } 159 | } else { 160 | assert(!list.contains(item)); 161 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 162 | assert(result.index == list.length || cast (T) list[result.index] > cast (T) item); 163 | assert(result.index == 0 || cast (T) list[result.index - 1] < cast (T) item); 164 | } else { 165 | assert(result.index == list.length || list[result.index] > item); 166 | assert(result.index == 0 || list[result.index - 1] < item); 167 | } 168 | } 169 | } 170 | body { 171 | // unsigned array indices make this prudent or imax could go out of range 172 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 173 | if (list.length == 0 || cast (T) item < cast (T) list[0]) return BinarySearchResult(false, 0); 174 | } else { 175 | if (list.length == 0 || item < list[0]) return BinarySearchResult(false, 0); 176 | } 177 | auto imax = list.length - 1; 178 | typeof(imax) imin = 0; 179 | 180 | while (imax >= imin) { 181 | typeof(imax) imid = (imin + imax) / 2; 182 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 183 | if (cast (T) list[imid] < cast (T) item) { 184 | imin = imid + 1; 185 | } else if (cast (T) list[imid] > cast (T) item) { 186 | imax = imid - 1; 187 | } else { 188 | return BinarySearchResult(true, imid); 189 | } 190 | } else { 191 | if (list[imid] < item) { 192 | imin = imid + 1; 193 | } else if (list[imid] > item) { 194 | imax = imid - 1; 195 | } else { 196 | return BinarySearchResult(true, imid); 197 | } 198 | } 199 | } 200 | assert(imin >= imax); 201 | return BinarySearchResult(false, imin); 202 | } 203 | unittest { 204 | assert(binary_search!int([], 5) == BinarySearchResult(false, 0)); 205 | assert(binary_search!int([5], 5) == BinarySearchResult(true, 0)); 206 | assert(binary_search!int([5], 6) == BinarySearchResult(false, 1)); 207 | assert(binary_search!int([5], 4) == BinarySearchResult(false, 0)); 208 | auto testlist = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23]; 209 | for (auto i = 0; i < testlist.length; i++) { 210 | assert(binary_search(testlist, testlist[i]) == BinarySearchResult(true, i)); 211 | assert(binary_search(testlist, testlist[i] - 1) == BinarySearchResult(false, i)); 212 | assert(binary_search(testlist, testlist[i] + 1) == BinarySearchResult(false, i + 1)); 213 | } 214 | mixin DummyClass; 215 | auto ctestlist = [new Dummy(1), new Dummy(3), new Dummy(5), new Dummy(7), new Dummy(9), new Dummy(11), new Dummy(13), new Dummy(15), new Dummy(17), new Dummy(19), new Dummy(21), new Dummy(23)]; 216 | for (auto i = 0; i < ctestlist.length; i++) { 217 | assert(binary_search(ctestlist, new Dummy(ctestlist[i].val)) == BinarySearchResult(true, i)); 218 | assert(binary_search(ctestlist, new Dummy(ctestlist[i].val - 1)) == BinarySearchResult(false, i)); 219 | assert(binary_search(ctestlist, new Dummy(ctestlist[i].val + 1)) == BinarySearchResult(false, i + 1)); 220 | } 221 | } 222 | 223 | private T[] to_ordered_no_dups(T)(in T[] list...) 224 | out (result) { 225 | assert(is_ordered_no_dups(result)); 226 | foreach (item; list) assert(result.contains(item)); 227 | } 228 | body { 229 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 230 | auto list_dup = (cast(T[]) list).dup; 231 | } else { 232 | auto list_dup = list.dup; 233 | } 234 | return list_dup.wa_sort.remove_adj_dups();; 235 | } 236 | unittest { 237 | int[] empty; 238 | assert(to_ordered_no_dups(empty) == []); 239 | int[] single = [1]; 240 | assert(to_ordered_no_dups(single) == [1]); 241 | int[] pair = [1, 1]; 242 | assert(to_ordered_no_dups(pair) == [1]); 243 | int[] few = [5, 1, 1, 5, 6, 6, 3]; 244 | assert(to_ordered_no_dups(few) == [1, 3, 5, 6]); 245 | assert(to_ordered_no_dups(9, 5, 1, 1, 5, 6, 6, 6, 3, 1, 7, 9) == [1, 3, 5, 6, 7, 9]); 246 | mixin DummyClass; 247 | Dummy[] dfew = [new Dummy(5), new Dummy(1), new Dummy(1), new Dummy(5), new Dummy(6), new Dummy(6), new Dummy(3)]; 248 | assert(to_ordered_no_dups(dfew) == [new Dummy(1), new Dummy(3), new Dummy(5), new Dummy(6)]); 249 | } 250 | 251 | private ref T[] insert(T)(ref T[] list, in T item) 252 | in { 253 | assert(is_ordered_no_dups(list)); 254 | } 255 | out (result) { 256 | assert(is_ordered_no_dups(result)); 257 | assert(result.contains(item)); 258 | foreach (i; list) assert(result.contains(i)); 259 | } 260 | body { 261 | auto bsr = binary_search(list, item); 262 | if (!bsr.found) { 263 | static if (!isAssignable!(T, const(T))) { 264 | list ~= cast(T) item; 265 | } else { 266 | list ~= item; 267 | } 268 | if (list.length > 1 && bsr.index < list.length - 1) { 269 | copy(retro(list[bsr.index .. $ - 1]), retro(list[bsr.index + 1 .. $])); 270 | static if (!isAssignable!(T, const(T))) { 271 | list[bsr.index] = cast(T) item; 272 | } else { 273 | list[bsr.index] = item; 274 | } 275 | } 276 | } 277 | return list; 278 | } 279 | unittest { 280 | auto list = [2, 4, 8, 16, 32]; 281 | assert(insert(list, 1) == [1, 2, 4, 8, 16, 32]); 282 | assert(insert(list, 1) == [1, 2, 4, 8, 16, 32]); 283 | assert(insert(list, 64) == [1, 2, 4, 8, 16, 32, 64]); 284 | assert(insert(list, 64) == [1, 2, 4, 8, 16, 32, 64]); 285 | assert(insert(list, 3) == [1, 2, 3, 4, 8, 16, 32, 64]); 286 | assert(insert(list, 7) == [1, 2, 3, 4, 7, 8, 16, 32, 64]); 287 | assert(insert(list, 21) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]); 288 | assert(insert(list, 64) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]); 289 | assert(insert(list, 1) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]); 290 | foreach (item; [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]) assert(insert(list, item) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]); 291 | mixin DummyClass; 292 | Dummy[] dlist = [new Dummy(2), new Dummy(4), new Dummy(8), new Dummy(16), new Dummy(32)]; 293 | assert(insert(dlist, new Dummy(1)).length == 6); 294 | } 295 | 296 | private ref T[] remove(T)(ref T[] list, in T item) 297 | in { 298 | assert(is_ordered_no_dups(list)); 299 | } 300 | out (result) { 301 | assert(is_ordered_no_dups(result)); 302 | assert(!result.contains(item)); 303 | foreach (i; list) if (i != item) assert(result.contains(i)); 304 | } 305 | body { 306 | auto bsr = binary_search(list, item); 307 | if (bsr.found) { 308 | copy(list[bsr.index + 1..$], list[bsr.index..$ - 1]); 309 | list.length--; 310 | } 311 | return list; 312 | } 313 | unittest { 314 | auto list = [1, 2, 3, 4, 7, 8, 16, 21, 32, 64]; 315 | assert(remove(list, 1) == [2, 3, 4, 7, 8, 16, 21, 32, 64]); 316 | assert(remove(list, 64) == [2, 3, 4, 7, 8, 16, 21, 32]); 317 | assert(remove(list, 3) == [2, 4, 7, 8, 16, 21, 32]); 318 | assert(remove(list, 7) == [2, 4, 8, 16, 21, 32]); 319 | assert(remove(list, 21) == [2, 4, 8, 16, 32]); 320 | mixin DummyClass; 321 | Dummy[] dlist = [new Dummy(2), new Dummy(4), new Dummy(8), new Dummy(16), new Dummy(32)]; 322 | assert(remove(dlist, dlist[0]).length == 4); 323 | } 324 | 325 | private T[] set_union(T)(in T[] list1, in T[] list2) 326 | in { 327 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 328 | } 329 | out (result) { 330 | assert(is_ordered_no_dups(result)); 331 | foreach (i; list1) assert(result.contains(i)); 332 | foreach (i; list2) assert(result.contains(i)); 333 | foreach (i; result) assert(list1.contains(i) || list2.contains(i)); 334 | } 335 | body { 336 | T[] su; 337 | su.reserve(list1.length + list2.length); 338 | size_t i_1, i_2; 339 | while (i_1 < list1.length && i_2 < list2.length) { 340 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 341 | static if (isAssignable!(T, const(T))) { 342 | su ~= list1[i_1++]; 343 | } else { 344 | su ~= cast(T) list1[i_1++]; 345 | } 346 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 347 | static if (isAssignable!(T, const(T))) { 348 | su ~= list2[i_2++]; 349 | } else { 350 | su ~= cast(T) list2[i_2++]; 351 | } 352 | } else { 353 | static if (isAssignable!(T, const(T))) { 354 | su ~= list1[i_1++]; 355 | } else { 356 | su ~= cast(T) list1[i_1++]; 357 | } 358 | i_2++; 359 | } 360 | } 361 | // Add the (one or less) tail if any 362 | if (i_1 < list1.length) { 363 | static if (isAssignable!(T, const(T))) { 364 | su ~= list1[i_1..$]; 365 | } else { 366 | su ~= cast(T[]) list1[i_1..$]; 367 | } 368 | } else if (i_2 < list2.length) { 369 | static if (isAssignable!(T, const(T))) { 370 | su ~= list2[i_2..$]; 371 | } else { 372 | su ~= cast(T[]) list2[i_2..$]; 373 | } 374 | } 375 | return su; 376 | } 377 | unittest { 378 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 379 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 380 | assert(set_union(list1, list2) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64, 128]); 381 | assert(set_union(list2, list1) == [1, 2, 3, 4, 7, 8, 16, 21, 32, 64, 128]); 382 | mixin DummyClass; 383 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 384 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 385 | assert(set_union(dlist1, dlist2) == set_union(dlist2, dlist1)); 386 | } 387 | 388 | private T[] set_intersection(T)(in T[] list1, in T[] list2) 389 | in { 390 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 391 | } 392 | out (result) { 393 | assert(is_ordered_no_dups(result)); 394 | foreach (i; list1) if (list2.contains(i)) assert(result.contains(i)); 395 | foreach (i; list2) if (list1.contains(i)) assert(result.contains(i)); 396 | foreach (i; result) assert(list1.contains(i) && list2.contains(i)); 397 | } 398 | body { 399 | T[] su; 400 | su.reserve(list1.length < list2.length ? list1.length : list2.length); 401 | size_t i_1, i_2; 402 | while (i_1 < list1.length && i_2 < list2.length) { 403 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 404 | i_1++; 405 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 406 | i_2++; 407 | } else { 408 | static if (isAssignable!(T, const(T))) { 409 | su ~= list1[i_1++]; 410 | } else { 411 | su ~= cast(T) list1[i_1++]; 412 | } 413 | i_2++; 414 | } 415 | } 416 | return su; 417 | } 418 | unittest { 419 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 420 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 421 | assert(set_intersection(list1, list2) == [2, 7, 21, 64]); 422 | assert(set_intersection(list2, list1) == [2, 7, 21, 64]); 423 | mixin DummyClass; 424 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 425 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 426 | assert(set_intersection(dlist1, dlist2) == set_intersection(dlist2, dlist1)); 427 | } 428 | 429 | private T[] symmetric_set_difference(T)(in T[] list1, in T[] list2) 430 | in { 431 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 432 | } 433 | out (result) { 434 | assert(is_ordered_no_dups(result)); 435 | foreach (i; list1) if (!list2.contains(i)) assert(result.contains(i)); 436 | foreach (i; list2) if (!list1.contains(i)) assert(result.contains(i)); 437 | foreach (i; result) assert((list1.contains(i) && !list2.contains(i)) || (list2.contains(i) && !list1.contains(i))); 438 | } 439 | body { 440 | T[] su; 441 | su.reserve(list1.length + list2.length); 442 | size_t i_1, i_2; 443 | while (i_1 < list1.length && i_2 < list2.length) { 444 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 445 | static if (isAssignable!(T, const(T))) { 446 | su ~= list1[i_1++]; 447 | } else { 448 | su ~= cast(T) list1[i_1++]; 449 | } 450 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 451 | static if (isAssignable!(T, const(T))) { 452 | su ~= list2[i_2++]; 453 | } else { 454 | su ~= cast(T) list2[i_2++]; 455 | } 456 | } else { 457 | i_1++; 458 | i_2++; 459 | } 460 | } 461 | // Add the (one or less) tail if any 462 | if (i_1 < list1.length) { 463 | static if (isAssignable!(T, const(T))) { 464 | su ~= list1[i_1..$]; 465 | } else { 466 | su ~= cast(T[]) list1[i_1..$]; 467 | } 468 | } else if (i_2 < list2.length) { 469 | static if (isAssignable!(T, const(T))) { 470 | su ~= list2[i_2..$]; 471 | } else { 472 | su ~= cast(T[]) list2[i_2..$]; 473 | } 474 | } 475 | return su; 476 | } 477 | unittest { 478 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 479 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 480 | assert(symmetric_set_difference(list1, list2) == [1, 3, 4, 8, 16, 32, 128]); 481 | assert(symmetric_set_difference(list2, list1) == [1, 3, 4, 8, 16, 32, 128]); 482 | mixin DummyClass; 483 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 484 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 485 | assert(symmetric_set_difference(dlist1, dlist2) == symmetric_set_difference(dlist2, dlist1)); 486 | } 487 | 488 | private T[] set_difference(T)(in T[] list1, in T[] list2) 489 | in { 490 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 491 | } 492 | out (result) { 493 | assert(is_ordered_no_dups(result)); 494 | foreach (i; list1) if (!list2.contains(i)) assert(result.contains(i)); 495 | foreach (i; list2) assert(!result.contains(i)); 496 | foreach (i; result) assert(list1.contains(i) && !list2.contains(i)); 497 | } 498 | body { 499 | T[] su; 500 | su.reserve(list1.length + list2.length); 501 | size_t i_1, i_2; 502 | while (i_1 < list1.length && i_2 < list2.length) { 503 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 504 | static if (isAssignable!(T, const(T))) { 505 | su ~= list1[i_1++]; 506 | } else { 507 | su ~= cast(T) list1[i_1++]; 508 | } 509 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 510 | i_2++; 511 | } else { 512 | i_1++; 513 | i_2++; 514 | } 515 | } 516 | // Add the (one or less) tail if any 517 | if (i_1 < list1.length) { 518 | static if (isAssignable!(T, const(T))) { 519 | su ~= list1[i_1..$]; 520 | } else { 521 | su ~= cast(T[]) list1[i_1..$]; 522 | } 523 | } 524 | return su; 525 | } 526 | unittest { 527 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 528 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 529 | assert(set_difference(list1, list2) == [8, 16, 32]); 530 | assert(set_difference(list2, list1) == [1, 3, 4, 128]); 531 | mixin DummyClass; 532 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 533 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 534 | assert(disjoint(set_difference(dlist1, dlist2), set_difference(dlist2, dlist1))); 535 | } 536 | 537 | private bool disjoint(T)(in T[] list1, in T[] list2) 538 | in { 539 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 540 | } 541 | out (result) { 542 | auto count = 0; 543 | foreach (i; list1) if (list2.contains(i)) count++; 544 | foreach (i; list2) if (list1.contains(i)) count++; 545 | assert(result == (count == 0)); 546 | } 547 | body { 548 | size_t i_1, i_2; 549 | while (i_1 < list1.length && i_2 < list2.length) { 550 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 551 | i_1++; 552 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 553 | i_2++; 554 | } else { 555 | return false; 556 | } 557 | } 558 | return true; 559 | } 560 | unittest { 561 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 562 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 563 | assert(!disjoint(list1, list2)); 564 | assert(!disjoint(list2, list1)); 565 | assert(disjoint([8, 16, 32], [1, 3, 4, 128])); 566 | mixin DummyClass; 567 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 568 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 569 | assert(disjoint(dlist1, dlist2) == disjoint(dlist2, dlist1)); 570 | } 571 | 572 | private bool contains(T)(in T[] list1, in T[] list2) 573 | in { 574 | assert(is_ordered_no_dups(list1) && is_ordered_no_dups(list2)); 575 | } 576 | out (result) { 577 | auto count = 0; 578 | foreach (i; list2) if (list1.contains(i)) count++; 579 | assert(result == (count == list2.length)); 580 | } 581 | body { 582 | size_t i_1, i_2; 583 | while (i_1 < list1.length && i_2 < list2.length) { 584 | if (cast(T) list1[i_1] < cast(T) list2[i_2]) { // WORKAROUND: class opCmp() design flaw 585 | i_1++; 586 | } else if (cast(T) list2[i_2] < cast(T) list1[i_1]) { // WORKAROUND: class opCmp() design flaw 587 | return false; 588 | } else { 589 | i_1++; 590 | i_2++; 591 | } 592 | } 593 | return i_2 == list2.length; 594 | } 595 | unittest { 596 | auto list1 = [2, 7, 8, 16, 21, 32, 64]; 597 | auto list2 = [1, 2, 3, 4, 7, 21, 64, 128]; 598 | auto list3 = set_difference(list1, list2); 599 | auto list4 = set_difference(list2, list3); 600 | assert(list1.contains(list1)); 601 | assert(!list1.contains(list2)); 602 | assert(list1.contains(list3)); 603 | assert(!list1.contains(list4)); 604 | assert(list2.contains(list2)); 605 | assert(!list2.contains(list1)); 606 | assert(!list2.contains(list3)); 607 | assert(list2.contains(list4)); 608 | mixin DummyClass; 609 | auto dlist1 = [new Dummy(2), new Dummy(7), new Dummy(8), new Dummy(16), new Dummy(21), new Dummy(32), new Dummy(64)]; 610 | auto dlist2 = [new Dummy(1), new Dummy(2), new Dummy(3), new Dummy(4), new Dummy(7), new Dummy(21), new Dummy(64), new Dummy(128)]; 611 | assert(contains(dlist1, dlist2) == contains(dlist2, dlist1)); 612 | } 613 | 614 | private string to_string(T)(const T[] list) 615 | { 616 | if (list.length == 0) return "[]"; 617 | string str = format("[%s", cast(T)list[0]); 618 | foreach (element; list[1..$]) { 619 | str ~= format(", %s", cast(T)element); 620 | } 621 | str ~= "]"; 622 | return str; 623 | } 624 | 625 | private bool distinct_lists(T)(const T[] list1, const T[] list2) 626 | { 627 | return list1 is null || list1 !is list2; 628 | } 629 | unittest { 630 | assert(distinct_lists([1, 2, 3], [1, 2, 3])); 631 | auto list = [1, 2, 3, 4, 5]; 632 | auto list_d = list; 633 | assert(!distinct_lists(list, list_d)); 634 | assert(distinct_lists(list, list.dup)); 635 | int[] list_e; 636 | assert(distinct_lists(list_e, list_e)); 637 | auto list_ed = list_e.dup; 638 | list_e ~= 2; 639 | list_ed ~= 2; 640 | assert(distinct_lists(list_e, list_ed)); 641 | } 642 | 643 | struct Set(T) { 644 | protected T[] _elements; 645 | invariant () { 646 | assert(is_ordered_no_dups(_elements), _elements.to_string()); 647 | } 648 | 649 | this(in T[] initial_items...) 650 | out { 651 | assert(_elements.length <= initial_items.length); 652 | foreach (item; initial_items) assert(_elements.contains(item)); 653 | } 654 | body { 655 | static if (!isAssignable!(T, const(T))) { 656 | _elements = (cast(T[]) initial_items).dup.wa_sort.remove_adj_dups(); 657 | } else { 658 | _elements = initial_items.dup.wa_sort.remove_adj_dups(); 659 | } 660 | } 661 | 662 | @property 663 | size_t cardinality() const 664 | { 665 | return _elements.length; 666 | } 667 | 668 | @property 669 | T[] elements() const 670 | out (result) { 671 | assert(result == _elements, _elements.to_string()); 672 | assert(distinct_lists(result, _elements), _elements.to_string()); 673 | } 674 | body { 675 | static if (!isAssignable!(T, const(T))) { 676 | return (cast(T[]) _elements).dup; 677 | } else { 678 | return _elements.dup; 679 | } 680 | } 681 | 682 | Set clone() const 683 | out (result) { 684 | assert(distinct_lists(result._elements, _elements)); 685 | assert(result._elements == _elements); 686 | assert(result !is this || this is Set()); 687 | } 688 | body { 689 | auto clone_set = Set(); 690 | static if (!isAssignable!(T, const(T))) { 691 | clone_set._elements = (cast(T[])_elements).dup; 692 | } else { 693 | clone_set._elements = _elements.dup; 694 | } 695 | return clone_set; 696 | } 697 | 698 | bool opEquals(in Set other_set) const 699 | { 700 | return _elements == other_set._elements; 701 | } 702 | 703 | bool opBinaryRight(string op)(in T putative_member) const if (op == "in") 704 | out (result) { 705 | assert(result ? _elements.contains(putative_member) : !_elements.contains(putative_member)); 706 | } 707 | body { 708 | return _elements.binary_search(putative_member).found; 709 | } 710 | 711 | bool is_disjoint_from(in Set other_set) const 712 | body { 713 | return _elements.disjoint(other_set._elements); 714 | } 715 | 716 | bool is_proper_subset_of(in Set other_set) const 717 | body { 718 | return _elements.length < other_set._elements.length && other_set._elements.contains(_elements); 719 | } 720 | 721 | bool is_proper_superset_of(in Set other_set) const 722 | body { 723 | return _elements.length > other_set._elements.length && _elements.contains(other_set._elements); 724 | } 725 | 726 | bool is_subset_of(in Set other_set) const 727 | body { 728 | return other_set._elements.contains(_elements); 729 | } 730 | 731 | bool is_superset_of(in Set other_set) const 732 | body { 733 | return _elements.contains(other_set._elements); 734 | } 735 | 736 | bool contains(in T[] putative_members ...) const 737 | { 738 | foreach (putative_member; putative_members) { 739 | if (!_elements.binary_search(putative_member).found) 740 | return false; 741 | } 742 | return true; 743 | } 744 | 745 | ref Set opOpAssign(string op)(in T new_item) if (op == "+" || op == "-") 746 | out { 747 | static if (op == "+") { 748 | assert(_elements.contains(new_item)); 749 | } else if (op == "-") { 750 | assert(!_elements.contains(new_item)); 751 | } else { 752 | assert(false); 753 | } 754 | } 755 | body { 756 | static if (op == "+") { 757 | _elements.insert(new_item); 758 | } else if (op == "-") { 759 | _elements.remove(new_item); 760 | } else { 761 | assert(false); 762 | } 763 | return this; 764 | } 765 | 766 | void add(in T[] new_items...) 767 | out { 768 | foreach (new_item; new_items) assert(_elements.contains(new_item)); 769 | } 770 | body { 771 | foreach(new_item; new_items) { 772 | _elements.insert(new_item); 773 | } 774 | } 775 | 776 | void remove(in T[] new_items...) 777 | out { 778 | foreach (new_item; new_items) assert(!_elements.contains(new_item)); 779 | } 780 | body { 781 | foreach(new_item; new_items) { 782 | _elements.remove(new_item); 783 | } 784 | } 785 | 786 | Set opBinary(string op)(in Set other_set) const if (op == "|" || op == "&" || op == "-" || op == "^") 787 | out (result) { 788 | foreach (set; [this, other_set]) { 789 | assert(result !is set || set is Set(), format("%s %s %s %s %s", op, set, this, other_set, result)); 790 | assert(distinct_lists(result._elements, set._elements)); 791 | } 792 | static if (op == "|") { 793 | foreach (item; result._elements) assert(this._elements.contains(item) || other_set._elements.contains(item)); 794 | foreach (set; [this, other_set]) { 795 | foreach (item; set._elements) assert(result._elements.contains(item)); 796 | } 797 | } else if (op == "&") { 798 | foreach (item; result._elements) assert(this._elements.contains(item) && other_set._elements.contains(item)); 799 | foreach (item; _elements) assert(result._elements.contains(item) || !other_set._elements.contains(item)); 800 | foreach (item; other_set._elements) assert(result._elements.contains(item) || !_elements.contains(item)); 801 | } else if (op == "-") { 802 | foreach (item; result._elements) assert(this._elements.contains(item) && !other_set._elements.contains(item)); 803 | foreach (item; _elements) assert(result._elements.contains(item) ? !other_set._elements.contains(item) : other_set._elements.contains(item)); 804 | } else if (op == "^") { 805 | foreach (item; result._elements) assert(this._elements.contains(item) ? !other_set._elements.contains(item) : other_set._elements.contains(item)); 806 | foreach (item; _elements) assert(result._elements.contains(item) ? !other_set._elements.contains(item) : other_set._elements.contains(item)); 807 | foreach (item; other_set._elements) assert(result._elements.contains(item) ? !_elements.contains(item) : _elements.contains(item)); 808 | } else { 809 | assert(false); 810 | } 811 | } 812 | body { 813 | auto new_set = Set(); 814 | static if (op == "|") { 815 | new_set._elements = _elements.set_union(other_set._elements); 816 | } else if (op == "&") { 817 | new_set._elements = _elements.set_intersection(other_set._elements); 818 | } else if (op == "-") { 819 | new_set._elements = _elements.set_difference(other_set._elements); 820 | } else if (op == "^") { 821 | new_set._elements = _elements.symmetric_set_difference(other_set._elements); 822 | } else { 823 | assert(false); 824 | } 825 | return new_set; 826 | } 827 | 828 | ref Set opOpAssign(string op)(in Set other_set) if (op == "|" || op == "-" || op == "^" || op == "&") 829 | out (result) { 830 | assert(result is this); 831 | // NB: without access to "before" state we can't do reasonable tests 832 | } 833 | body { 834 | static if (op == "|") { 835 | _elements = _elements.set_union(other_set._elements); 836 | } else if (op == "&") { 837 | _elements = _elements.set_intersection(other_set._elements); 838 | } else if (op == "-") { 839 | _elements = _elements.set_difference(other_set._elements); 840 | } else if (op == "^") { 841 | _elements = _elements.symmetric_set_difference(other_set._elements); 842 | } else { 843 | assert(false); 844 | } 845 | return this; 846 | } 847 | 848 | int opApply(int delegate(T) dg) 849 | out { 850 | assert(is_ordered_no_dups(_elements), _elements.to_string()); 851 | } 852 | body { 853 | foreach (element; _elements) { 854 | auto result = dg(element); 855 | if (result) return result; 856 | } 857 | return 0; 858 | } 859 | 860 | string toString() const 861 | body { 862 | if (_elements.length == 0) return "Set{}"; 863 | static if (!isAssignable!(T, const(T))) { 864 | auto str = format("Set{%s", cast(T) _elements[0]); 865 | foreach (element; _elements[1..$]) { 866 | str ~= format(", %s", cast(T) element); 867 | } 868 | } else { 869 | auto str = format("Set{%s", _elements[0]); 870 | foreach (element; _elements[1..$]) { 871 | str ~= format(", %s", element); 872 | } 873 | } 874 | str ~= "}"; 875 | return str; 876 | } 877 | } 878 | unittest { 879 | import std.random; 880 | auto int_set = Set!int(); 881 | assert(int_set.cardinality == 0); 882 | int_set.add(3, 4, 5, 2, 8, 17, 5, 4); 883 | assert(int_set.elements == [2, 3, 4, 5, 8, 17]); 884 | assert(Set!int(3, 4, 5, 2, 8, 17, 5, 4).cardinality == 6); 885 | assert(Set!int(3, 4, 5, 2, 8, 17, 5, 4).elements == [2, 3, 4, 5, 8, 17]); 886 | assert(8 in Set!int(3, 4, 5, 2, 8, 17, 5, 4)); 887 | assert(8 !in (Set!int(3, 4, 5, 2, 8, 17, 5, 4) -= 8)); 888 | assert(11 !in Set!int(3, 4, 5, 2, 8, 17, 5, 4)); 889 | assert(11 in (Set!int(3, 4, 5, 2, 8, 17, 5, 4) += 11)); 890 | assert((Set!int(3, 9, 4, 5, 3) | Set!int(1, 3, 5, 7)).elements == [1, 3, 4, 5, 7, 9]); 891 | assert((Set!int(3, 9, 4, 5, 3) & Set!int(1, 3, 5, 7)).elements == [3, 5]); 892 | assert((Set!int(3, 9, 4, 5, 3) - Set!int(1, 3, 5, 7)).elements == [4, 9]); 893 | assert((Set!int(3, 9, 4, 5, 3) ^ Set!int(1, 3, 5, 7)).elements == [1, 4, 7, 9], (Set!int(3, 9, 4, 5, 3) ^ Set!int(1, 3, 5, 7)).toString()); 894 | assert((Set!int(3, 9, 4, 5, 3) |= Set!int(1, 3, 5, 7)).elements == [1, 3, 4, 5, 7, 9]); 895 | assert((Set!int(3, 9, 4, 5, 3) &= Set!int(1, 3, 5, 7)).elements == [3, 5]); 896 | assert((Set!int(3, 9, 4, 5, 3) -= Set!int(1, 3, 5, 7)).elements == [4, 9]); 897 | assert((Set!int(3, 9, 4, 5, 3) ^= Set!int(1, 3, 5, 7)).elements == [1, 4, 7, 9], (Set!int(3, 9, 4, 5, 3) ^ Set!int(1, 3, 5, 7)).toString()); 898 | mixin DummyClass; 899 | auto dummy_set = Set!Dummy(); 900 | assert(dummy_set.cardinality == 0); 901 | dummy_set.add(list_of_dummies(3, 4, 5, 2, 8, 17, 5, 4)); 902 | assert(dummy_set.elements == list_of_dummies(2, 3, 4, 5, 8, 17)); 903 | assert(Set!Dummy(list_of_dummies(3, 4, 5, 2, 8, 17, 5, 4)).cardinality == 6); 904 | assert(Set!Dummy(list_of_dummies(3, 4, 5, 2, 8, 17, 5, 4)).elements.length == 6); 905 | assert(new Dummy(8) in Set!Dummy(list_of_dummies(3, 4, 5, 2, 8, 17, 5, 4))); 906 | assert(new Dummy(11) in (Set!Dummy(list_of_dummies(3, 4, 5, 2, 8, 17, 5, 4)) += new Dummy(11))); 907 | assert(dummy_set.cardinality == 6); 908 | assert((Set!Dummy(list_of_dummies(3, 9, 4, 5, 3)) | Set!Dummy(list_of_dummies(1, 3, 5, 7))).elements == list_of_dummies(1, 3, 4, 5, 7, 9)); 909 | int [] random_list(int len, int min_val, int max_val) 910 | body { 911 | int[] list; 912 | for (auto i = 0; i < len; i++) list ~= uniform(min_val, max_val); 913 | return list; 914 | } 915 | for (auto i = 0; i < 100; i++) { 916 | auto set_i = Set!int(); 917 | set_i.add(random_list(16, 0, 24)); 918 | set_i.remove(random_list(16, 0, 24)); 919 | auto set_u = Set!int(random_list(i, 0, 24)) | Set!int(random_list(i + 2, 0, 24)); 920 | auto set_d = Set!int(random_list(i, 0, 24)) - Set!int(random_list(i + 2, 0, 24)); 921 | auto set_a = Set!int(random_list(i, 0, 24)) & Set!int(random_list(i + 2, 0, 24)); 922 | auto set_sd = Set!int(random_list(i, 0, 24)) ^ Set!int(random_list(i + 2, 0, 24)); 923 | set_u |= Set!int(random_list(i, 0, 24)); 924 | set_u ^= Set!int(random_list(i, 0, 24)); 925 | set_u &= Set!int(random_list(i, 0, 24)); 926 | set_u -= Set!int(random_list(i, 0, 24)); 927 | } 928 | } 929 | 930 | Set!T extract_key_set(T, G)(const(G[T]) assocArray) 931 | out (result) { 932 | assert(result.cardinality == assocArray.length); 933 | foreach(element; result.elements) 934 | assert(element in assocArray); 935 | } 936 | body { 937 | return Set!T(assocArray.keys); 938 | } 939 | 940 | unittest { 941 | auto test = [1: 2, 7 : 8, 3 : 4]; 942 | assert(extract_key_set(test).elements == [1, 3, 7]); 943 | } 944 | -------------------------------------------------------------------------------- /symbols.d: -------------------------------------------------------------------------------- 1 | // symbols.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | module symbols; 10 | 11 | import std.string; 12 | import std.regex; 13 | 14 | import sets; 15 | import idnumber; 16 | import workarounds; 17 | import workarounds: wa_sort; 18 | 19 | import ddlib.lexan; 20 | 21 | alias uint SymbolId; 22 | enum SpecialSymbols : SymbolId { start, end, invalid_token, parse_error }; 23 | 24 | enum SymbolType {token, tag, non_terminal}; 25 | 26 | enum Associativity {nonassoc, left, right}; 27 | alias uint Precedence; 28 | struct AssociativePrecedence { 29 | Associativity associativity; 30 | Precedence precedence; 31 | 32 | @property 33 | bool is_explicitly_set() 34 | { 35 | return precedence != 0; 36 | } 37 | } 38 | 39 | class FirstsData { 40 | Set!TokenSymbol tokenset; 41 | bool transparent; 42 | 43 | this(Set!TokenSymbol tokenset, bool transparent) 44 | { 45 | this.tokenset = tokenset; 46 | this.transparent = transparent; 47 | } 48 | 49 | override string toString() 50 | { 51 | return format("Firsts: %s; Transparent: %s", tokenset, transparent); 52 | } 53 | } 54 | 55 | bool is_allowable_name(string name) 56 | { 57 | return name.length < 2 || (name[0..2] != "dd" && name[0..2] != "DD"); 58 | } 59 | 60 | class Symbol { 61 | mixin IdNumber!(SymbolId); 62 | const SymbolType type; 63 | const string name; 64 | AssociativePrecedence associative_precedence; 65 | CharLocation defined_at; 66 | CharLocation[] used_at; 67 | string field_name; 68 | string pattern; 69 | FirstsData firsts_data; 70 | 71 | this(SymbolId id, string sname, SymbolType stype, CharLocation location, bool is_definition=true) 72 | { 73 | this.id = id; 74 | name = sname; 75 | type = stype; 76 | if (type == SymbolType.token) { 77 | // FIRST() for a token is trivial 78 | firsts_data = new FirstsData(Set!TokenSymbol(this), false); 79 | } else { 80 | firsts_data = null; 81 | } 82 | if (is_definition) { 83 | defined_at = location; 84 | } else { 85 | used_at ~= location; 86 | } 87 | } 88 | 89 | @property 90 | Associativity associativity() 91 | { 92 | return associative_precedence.associativity; 93 | } 94 | 95 | @property 96 | Precedence precedence() 97 | { 98 | return associative_precedence.precedence; 99 | } 100 | 101 | @property 102 | bool is_defined() 103 | { 104 | return defined_at != CharLocation(0, 0); 105 | } 106 | 107 | @property 108 | bool is_used() 109 | { 110 | return used_at.length > 0; 111 | } 112 | 113 | override string toString() 114 | { 115 | if (type == SymbolType.token && pattern.length > 0 && pattern[0] == '"') { 116 | return pattern; 117 | } 118 | return name; 119 | } 120 | } 121 | 122 | alias Symbol TokenSymbol; 123 | alias Symbol TagSymbol; 124 | alias Symbol NonTerminalSymbol; 125 | 126 | struct FieldDefinition { 127 | string field_name; 128 | string field_type; 129 | string conversion_function_name; 130 | CharLocation defined_at; 131 | } 132 | 133 | import std.stdio; 134 | 135 | struct SymbolTable { 136 | private SymbolId next_symbol_id = SpecialSymbols.max + 1; 137 | private static Symbol[SpecialSymbols.max + 1] special_symbols; 138 | private TokenSymbol[string] tokens; // indexed by token name 139 | private TokenSymbol[string] literal_tokens; // indexed by literal string 140 | private TagSymbol[string] tags; // indexed by name 141 | private NonTerminalSymbol[string] non_terminals; // indexed by name 142 | private FieldDefinition[string] field_definitions; // indexed by name 143 | private string[] skip_rule_list; 144 | private auto current_precedence = Precedence.max; 145 | 146 | invariant () { 147 | import std.stdio; 148 | auto check_sum = 0; // Sum of arithmetic series 149 | for (auto i = SpecialSymbols.min; i <= SpecialSymbols.max; i++) { 150 | assert(special_symbols[i].id == i); 151 | check_sum += i; 152 | } 153 | assert(next_symbol_id == (SpecialSymbols.max + tokens.length + tags.length + non_terminals.length + 1)); 154 | foreach (literal; literal_tokens.keys) { 155 | auto literal_token = literal_tokens[literal]; 156 | assert(literal_token.pattern == literal); 157 | assert(tokens[literal_token.name] is literal_token); 158 | } 159 | foreach (key; tokens.byKey()) { 160 | auto token = tokens[key]; 161 | assert(token.name == key && token.type == SymbolType.token); 162 | assert(token.pattern[0] == '"' ? literal_tokens[token.pattern] is token : token.pattern !in literal_tokens); 163 | assert(token.id > SpecialSymbols.max && token.id < next_symbol_id); 164 | check_sum += token.id; 165 | } 166 | foreach (key; tags.byKey()) { 167 | auto tag = tags[key]; 168 | assert(tag.name == key && tag.type == SymbolType.tag); 169 | assert(tag.id > SpecialSymbols.max && tag.id < next_symbol_id); 170 | check_sum += tag.id; 171 | } 172 | foreach (key; non_terminals.byKey()) { 173 | auto non_terminal = non_terminals[key]; 174 | assert(non_terminal.name == key && non_terminal.type == SymbolType.non_terminal); 175 | assert(non_terminal.id > SpecialSymbols.max && non_terminal.id < next_symbol_id); 176 | check_sum += non_terminal.id; 177 | } 178 | assert(check_sum == ((next_symbol_id * (next_symbol_id - 1)) / 2)); 179 | } 180 | 181 | static this() 182 | { 183 | special_symbols[SpecialSymbols.start] = new Symbol(SpecialSymbols.start, "ddSTART", SymbolType.non_terminal, CharLocation(0, 0)); 184 | special_symbols[SpecialSymbols.end] = new Symbol(SpecialSymbols.end, "ddEND", SymbolType.token, CharLocation(0, 0)); 185 | special_symbols[SpecialSymbols.invalid_token] = new Symbol(SpecialSymbols.invalid_token, "ddINVALID_TOKEN", SymbolType.token, CharLocation(0, 0)); 186 | special_symbols[SpecialSymbols.parse_error] = new Symbol(SpecialSymbols.parse_error, "ddERROR", SymbolType.non_terminal, CharLocation(0, 0)); 187 | special_symbols[SpecialSymbols.parse_error].firsts_data = new FirstsData(Set!Symbol(), true); 188 | } 189 | 190 | TokenSymbol new_token(string new_token_name, string pattern, CharLocation location, string field_name = "") 191 | in { 192 | assert(!is_known_symbol(new_token_name)); 193 | } 194 | body { 195 | auto token = cast(TokenSymbol) new Symbol(next_symbol_id++, new_token_name, SymbolType.token, location); 196 | token.pattern = pattern; 197 | token.field_name = field_name; 198 | tokens[new_token_name] = token; 199 | if (pattern[0] == '"') { 200 | literal_tokens[pattern] = token; 201 | } 202 | return token; 203 | } 204 | 205 | TagSymbol new_tag(string new_tag_name, CharLocation location) 206 | in { 207 | assert(!is_known_symbol(new_tag_name)); 208 | } 209 | body { 210 | auto tag = cast(TagSymbol) new Symbol(next_symbol_id++, new_tag_name, SymbolType.tag, location); 211 | tags[new_tag_name] = tag; 212 | return tag; 213 | } 214 | 215 | @property 216 | size_t token_count() 217 | { 218 | return tokens.length; 219 | } 220 | 221 | @property 222 | size_t non_terminal_count() 223 | { 224 | return non_terminals.length; 225 | } 226 | 227 | bool is_known_token(string symbol_name) 228 | { 229 | return (symbol_name in tokens) !is null; 230 | } 231 | 232 | bool is_known_literal(string literal) 233 | { 234 | return (literal in literal_tokens) !is null; 235 | } 236 | 237 | bool is_known_tag(string symbol_name) 238 | { 239 | return (symbol_name in tags) !is null; 240 | } 241 | 242 | bool 243 | is_known_non_terminal(string symbol_name) 244 | { 245 | return (symbol_name in non_terminals) !is null; 246 | } 247 | 248 | bool is_known_symbol(string symbol_name) 249 | { 250 | return symbol_name in tokens || symbol_name in tags || symbol_name in non_terminals; 251 | } 252 | 253 | Symbol get_symbol(string symbol_name) 254 | { 255 | if (symbol_name in tokens) { 256 | return tokens[symbol_name]; 257 | } else if (symbol_name in non_terminals) { 258 | return non_terminals[symbol_name]; 259 | } else if (symbol_name in tags) { 260 | return tags[symbol_name]; 261 | } 262 | return null; 263 | } 264 | 265 | Symbol get_special_symbol(SymbolId symbol_id) 266 | { 267 | return special_symbols[symbol_id]; 268 | } 269 | 270 | Symbol get_symbol(string symbol_name, CharLocation location, bool auto_create=false) 271 | { 272 | auto symbol = get_symbol(symbol_name); 273 | if (symbol !is null) { 274 | symbol.used_at ~= location; 275 | } else if (auto_create) { 276 | // if it's referenced without being defined it's a non terminal 277 | symbol = cast(NonTerminalSymbol) new Symbol(next_symbol_id++, symbol_name, SymbolType.non_terminal, location, false); 278 | non_terminals[symbol_name] = symbol; 279 | } 280 | return symbol; 281 | } 282 | 283 | TokenSymbol get_literal_token(string literal, CharLocation location) 284 | { 285 | auto token_symbol = literal_tokens.get(literal, null); 286 | if (token_symbol !is null) { 287 | token_symbol.used_at ~= location; 288 | } 289 | return token_symbol; 290 | } 291 | 292 | CharLocation get_declaration_point(string symbol_name) 293 | in { 294 | assert(is_known_symbol(symbol_name)); 295 | } 296 | body { 297 | if (symbol_name in tokens) { 298 | return tokens[symbol_name].defined_at; 299 | } else if (symbol_name in non_terminals) { 300 | return non_terminals[symbol_name].defined_at; 301 | } else if (symbol_name in tags) { 302 | return tags[symbol_name].defined_at; 303 | } 304 | assert(0); 305 | } 306 | 307 | void set_precedences(Associativity assoc, string[] symbol_names, CharLocation location) 308 | in { 309 | foreach (symbol_name; symbol_names) { 310 | assert(!is_known_non_terminal(symbol_name)); 311 | assert(!is_known_tag(symbol_name)); 312 | } 313 | } 314 | body { 315 | foreach (symbol_name; symbol_names) { 316 | auto symbol = tokens.get(symbol_name, null); 317 | if (symbol is null) { 318 | symbol = new_tag(symbol_name, location); 319 | } 320 | symbol.associative_precedence = AssociativePrecedence(assoc, current_precedence); 321 | } 322 | current_precedence--; 323 | } 324 | 325 | void set_precedences(Associativity assoc, Symbol[] symbols) 326 | in { 327 | foreach (symbol; symbols) { 328 | assert(symbol.type != SymbolType.non_terminal); 329 | } 330 | } 331 | body { 332 | foreach (symbol; symbols) { 333 | symbol.associative_precedence = AssociativePrecedence(assoc, current_precedence); 334 | } 335 | current_precedence--; 336 | } 337 | 338 | void new_field(string field_name, string field_type, string conv_func_name, CharLocation defined_at) 339 | in { 340 | assert(!is_known_field(field_name)); 341 | } 342 | body { 343 | field_definitions[field_name] = FieldDefinition(field_name, field_type, conv_func_name); 344 | } 345 | 346 | bool is_known_field(string field_name) 347 | { 348 | return (field_name in field_definitions) !is null; 349 | } 350 | 351 | void add_skip_rule(string new_rule) 352 | in { 353 | assert(new_rule.length > 3); 354 | } 355 | body { 356 | skip_rule_list ~= new_rule; 357 | } 358 | 359 | NonTerminalSymbol define_non_terminal(string symbol_name, CharLocation location) 360 | in { 361 | assert(!is_known_token(symbol_name) && !is_known_tag(symbol_name)); 362 | assert(!is_known_non_terminal(symbol_name) || !non_terminals[symbol_name].is_defined()); 363 | } 364 | body { 365 | auto symbol = non_terminals.get(symbol_name, null); 366 | if (symbol !is null) { 367 | symbol.defined_at = location; 368 | } else { 369 | symbol = cast(NonTerminalSymbol) new Symbol(next_symbol_id++, symbol_name, SymbolType.non_terminal, location, true); 370 | non_terminals[symbol_name] = symbol; 371 | } 372 | return symbol; 373 | } 374 | 375 | NonTerminalSymbol[] get_undefined_symbols() 376 | { 377 | NonTerminalSymbol[] undefined_symbols; 378 | foreach (nts; non_terminals) { 379 | if (!nts.is_defined) { 380 | undefined_symbols ~= nts; 381 | } 382 | } 383 | return undefined_symbols; 384 | } 385 | 386 | Symbol[] get_unused_symbols() 387 | { 388 | Symbol[] unused_symbols; 389 | foreach (symbols; [tokens, tags, non_terminals]) { 390 | foreach (symbol; symbols) { 391 | if (!symbol.is_used) { 392 | unused_symbols ~= symbol; 393 | } 394 | } 395 | } 396 | return unused_symbols; 397 | } 398 | 399 | TokenSymbol[] get_tokens_ordered() 400 | out (result) { 401 | mixin WorkAroundClassCmpLimitations!TokenSymbol; 402 | for (auto i = 0; i < result.length; i++) { 403 | if (i > 0) assert(WA_CMP_ECAST(result[i -1]) < WA_CMP_ECAST(result[i])); 404 | assert(result[i].type == SymbolType.token); 405 | } 406 | assert(result.length == tokens.length); 407 | } 408 | body { 409 | return tokens.values.wa_sort; 410 | } 411 | 412 | TokenSymbol[] get_special_tokens_ordered() 413 | out (result) { 414 | mixin WorkAroundClassCmpLimitations!TokenSymbol; 415 | for (auto i = 0; i < result.length; i++) { 416 | if (i > 0) assert(WA_CMP_ECAST(result[i -1]) < WA_CMP_ECAST(result[i])); 417 | assert(result[i].type == SymbolType.token); 418 | assert(result[i].id >= SpecialSymbols.min); 419 | assert(result[i].id <= SpecialSymbols.max); 420 | } 421 | } 422 | body { 423 | // special_symbols is ordered so just pick the tokens 424 | TokenSymbol[] special_tokens_ordered; 425 | foreach (symbol; special_symbols) { 426 | if (symbol.type == SymbolType.token) { 427 | special_tokens_ordered ~= symbol; 428 | } 429 | } 430 | return special_tokens_ordered; 431 | } 432 | 433 | NonTerminalSymbol[] get_non_terminals_ordered() 434 | out (result) { 435 | mixin WorkAroundClassCmpLimitations!TokenSymbol; 436 | for (auto i = 0; i < result.length; i++) { 437 | if (i > 0) assert(WA_CMP_ECAST(result[i -1]) < WA_CMP_ECAST(result[i])); 438 | assert(result[i].type == SymbolType.non_terminal); 439 | } 440 | assert(result.length == non_terminals.length); 441 | } 442 | body { 443 | return non_terminals.values.wa_sort; 444 | } 445 | 446 | NonTerminalSymbol[] get_special_non_terminals_ordered() 447 | out (result) { 448 | mixin WorkAroundClassCmpLimitations!TokenSymbol; 449 | for (auto i = 0; i < result.length; i++) { 450 | if (i > 0) assert(WA_CMP_ECAST(result[i -1]) < WA_CMP_ECAST(result[i])); 451 | assert(result[i].type == SymbolType.non_terminal); 452 | assert(result[i].id >= SpecialSymbols.min); 453 | assert(result[i].id <= SpecialSymbols.max); 454 | } 455 | } 456 | body { 457 | NonTerminalSymbol[] special_non_terminals; 458 | foreach (symbol; special_symbols) { 459 | if (symbol.type == SymbolType.non_terminal) { 460 | special_non_terminals ~= symbol; 461 | } 462 | } 463 | return special_non_terminals; 464 | } 465 | 466 | CharLocation get_field_defined_at(string field_name) 467 | { 468 | return field_definitions[field_name].defined_at; 469 | } 470 | 471 | FieldDefinition[] get_field_definitions() 472 | { 473 | return field_definitions.values; 474 | } 475 | 476 | string[] get_skip_rules() 477 | { 478 | return skip_rule_list.dup; 479 | } 480 | 481 | string[] get_description() 482 | { 483 | auto text_lines = ["Fields:"]; 484 | if (field_definitions.length == 0) { 485 | text_lines ~= " "; 486 | } else { 487 | foreach (key; field_definitions.keys.wa_sort) { 488 | with (field_definitions[key]) { 489 | if (conversion_function_name.length == 0) { 490 | text_lines ~= format(" %s: %s: %s to!(%s)(string str)", field_name, field_type, field_type, field_type); 491 | } else { 492 | text_lines ~= format(" %s: %s: %s %s(string str)", field_name, field_type, field_type, conversion_function_name); 493 | } 494 | } 495 | } 496 | } 497 | text_lines ~= "Tokens:"; 498 | if (tokens.length == 0) { 499 | text_lines ~= " "; 500 | } else { 501 | foreach (token; get_tokens_ordered()) { 502 | with (token) { 503 | text_lines ~= format(" %s: %s: %s: %s: %s: %s", id, name, pattern, field_name, associativity, precedence); 504 | text_lines ~= format(" Defined At: %s", defined_at); 505 | text_lines ~= format(" Used At: %s", used_at); 506 | } 507 | } 508 | } 509 | text_lines ~= "Precedence Tags:"; 510 | if (tags.length == 0) { 511 | text_lines ~= " "; 512 | } else { 513 | foreach (tagKey; tags.keys.wa_sort) { 514 | with (tags[tagKey]) { 515 | text_lines ~= format(" %s: %s: %s: %s", id, name, associativity, precedence); 516 | text_lines ~= format(" Defined At: %s", defined_at); 517 | text_lines ~= format(" Used At: %s", used_at); 518 | } 519 | } 520 | } 521 | text_lines ~= "Non Terminal Symbols:"; 522 | if (non_terminals.length == 0) { 523 | text_lines ~= " "; 524 | } else { 525 | foreach (non_terminal; get_non_terminals_ordered()) { 526 | with (non_terminal) { 527 | text_lines ~= format(" %s: %s:", id, name); 528 | text_lines ~= format(" Defined At: %s", defined_at); 529 | text_lines ~= format(" Used At: %s", used_at); 530 | } 531 | } 532 | } 533 | return text_lines; 534 | } 535 | } 536 | 537 | unittest { 538 | auto st = SymbolTable(); 539 | assert(st.get_special_symbol(SpecialSymbols.start).name == "ddSTART"); 540 | } 541 | -------------------------------------------------------------------------------- /workarounds.d: -------------------------------------------------------------------------------- 1 | // workarounds.d 2 | // 3 | // Copyright Peter Williams 2013 . 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. 6 | // (See accompanying file LICENSE_1_0.txt or copy at 7 | // http://www.boost.org/LICENSE_1_0.txt) 8 | 9 | import std.algorithm: find, sort; 10 | 11 | mixin template WorkAroundClassCmpLimitations(T) { 12 | static if (is(T == class)) { 13 | // WORKAROUND: comparison problems with const classes 14 | auto WA_CMP_ECAST = (const T x) => cast(T)x; 15 | } else { 16 | auto WA_CMP_ECAST = (const T x) => x; 17 | } 18 | } 19 | 20 | // Does this list contain the item? For use in contracts. 21 | private bool contains(T)(in T[] list, in T item) 22 | { 23 | auto tail = find(list, item); 24 | return tail.length && tail[0] == item; 25 | } 26 | unittest { 27 | assert(contains([1, 3, 9, 12], 3)); 28 | assert(!contains([1, 3, 9, 12], 5)); 29 | } 30 | 31 | private bool is_ordered(T)(in T[] list) 32 | { 33 | for (auto j = 1; j < list.length; j++) { 34 | static if (is(T == class)) { // WORKAROUND: class opCmp() design flaw 35 | if (cast(T) list[j - 1] > cast(T) list[j]) return false; 36 | } else { 37 | if (list[j - 1] > list[j]) return false; 38 | } 39 | } 40 | return true; 41 | } 42 | unittest { 43 | assert(is_ordered(new int[0])); 44 | assert(is_ordered([1])); 45 | assert(is_ordered([1, 1])); 46 | assert(is_ordered([1, 2, 3, 4, 5, 6, 7])); 47 | assert(!is_ordered([1, 2, 4, 3, 5, 6, 7])); 48 | assert(is_ordered([1, 2, 3, 5, 5, 6, 7])); 49 | } 50 | 51 | T[] wa_sort(T)(T[] list) 52 | out (result) { 53 | assert(list.length == result.length); 54 | assert(result.is_ordered); 55 | foreach (item; list) assert(result.contains(item)); 56 | foreach (item; result) assert(list.contains(item)); 57 | } 58 | body { 59 | auto olist = new T[list.length]; 60 | auto sr = sort(list); 61 | for (size_t index = 0; index < list.length; index++) { 62 | olist[index] = sr[index]; 63 | } 64 | return olist; 65 | } 66 | --------------------------------------------------------------------------------