├── .github └── workflows │ └── ci.yml ├── Dscanner ├── libdparse │ └── src │ │ ├── dparse │ │ ├── ast.d │ │ ├── entities.d │ │ ├── formatter.d │ │ ├── lexer.d │ │ └── parser.d │ │ └── std │ │ └── experimental │ │ └── lexer.d └── src │ └── astprinter.d ├── Makefile ├── README.md ├── adrdox_docs ├── home.d ├── package.d ├── syntax.d └── tips.d ├── archive.d ├── cgi.d ├── color.d ├── comment.d ├── database.d ├── db.sql ├── doc2.d ├── dom.d ├── dub.json ├── jstex.d ├── jsvar.d ├── katex ├── KaTeX_AMS-Regular.ttf ├── KaTeX_AMS-Regular.woff ├── KaTeX_AMS-Regular.woff2 ├── KaTeX_Caligraphic-Bold.ttf ├── KaTeX_Caligraphic-Bold.woff ├── KaTeX_Caligraphic-Bold.woff2 ├── KaTeX_Caligraphic-Regular.ttf ├── KaTeX_Caligraphic-Regular.woff ├── KaTeX_Caligraphic-Regular.woff2 ├── KaTeX_Fraktur-Bold.ttf ├── KaTeX_Fraktur-Bold.woff ├── KaTeX_Fraktur-Bold.woff2 ├── KaTeX_Fraktur-Regular.ttf ├── KaTeX_Fraktur-Regular.woff ├── KaTeX_Fraktur-Regular.woff2 ├── KaTeX_Main-Bold.ttf ├── KaTeX_Main-Bold.woff ├── KaTeX_Main-Bold.woff2 ├── KaTeX_Main-BoldItalic.ttf ├── KaTeX_Main-BoldItalic.woff ├── KaTeX_Main-BoldItalic.woff2 ├── KaTeX_Main-Italic.ttf ├── KaTeX_Main-Italic.woff ├── KaTeX_Main-Italic.woff2 ├── KaTeX_Main-Regular.ttf ├── KaTeX_Main-Regular.woff ├── KaTeX_Main-Regular.woff2 ├── KaTeX_Math-BoldItalic.ttf ├── KaTeX_Math-BoldItalic.woff ├── KaTeX_Math-BoldItalic.woff2 ├── KaTeX_Math-Italic.ttf ├── KaTeX_Math-Italic.woff ├── KaTeX_Math-Italic.woff2 ├── KaTeX_SansSerif-Bold.ttf ├── KaTeX_SansSerif-Bold.woff ├── KaTeX_SansSerif-Bold.woff2 ├── KaTeX_SansSerif-Italic.ttf ├── KaTeX_SansSerif-Italic.woff ├── KaTeX_SansSerif-Italic.woff2 ├── KaTeX_SansSerif-Regular.ttf ├── KaTeX_SansSerif-Regular.woff ├── KaTeX_SansSerif-Regular.woff2 ├── KaTeX_Script-Regular.ttf ├── KaTeX_Script-Regular.woff ├── KaTeX_Script-Regular.woff2 ├── KaTeX_Size1-Regular.ttf ├── KaTeX_Size1-Regular.woff ├── KaTeX_Size1-Regular.woff2 ├── KaTeX_Size2-Regular.ttf ├── KaTeX_Size2-Regular.woff ├── KaTeX_Size2-Regular.woff2 ├── KaTeX_Size3-Regular.ttf ├── KaTeX_Size3-Regular.woff ├── KaTeX_Size3-Regular.woff2 ├── KaTeX_Size4-Regular.ttf ├── KaTeX_Size4-Regular.woff ├── KaTeX_Size4-Regular.woff2 ├── KaTeX_Typewriter-Regular.ttf ├── KaTeX_Typewriter-Regular.woff ├── KaTeX_Typewriter-Regular.woff2 ├── katex.min.css └── katex.min.js ├── latex.d ├── locate.d ├── postgres.d ├── script.d ├── script.js ├── search-docs.html ├── search-docs.js ├── skeleton-default.html ├── stemmer.d ├── style.css └── syntaxhighlighter.d /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | tags: 7 | - 'v[0-9]+.[0-9]+.[0-9]+' 8 | pull_request: 9 | jobs: 10 | build: 11 | strategy: 12 | matrix: 13 | os: 14 | - ubuntu-latest 15 | - macos-latest 16 | - windows-latest 17 | dc: 18 | - dmd-latest 19 | - ldc-latest 20 | - dmd-beta 21 | - ldc-beta 22 | profile: 23 | - debug 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - name: Setup D compiler 27 | uses: dlang-community/setup-dlang@v2 28 | with: 29 | compiler: ${{ matrix.dc }} 30 | - name: Checkout source 31 | uses: actions/checkout@v4 32 | - name: Build binary 33 | run: dub build -b ${{ matrix.profile }} 34 | 35 | build-release: 36 | if: startsWith(github.ref, 'refs/tags/v') 37 | needs: 38 | - build 39 | strategy: 40 | fail-fast: true 41 | matrix: 42 | include: 43 | - name: linux-i686 44 | os: ubuntu-latest 45 | arch: x86 46 | - name: linux-x86_64 47 | os: ubuntu-latest 48 | arch: x86_64 49 | - name: macos-x86_64 50 | os: macos-latest 51 | arch: x86_64 52 | - name: windows-i686 53 | os: windows-latest 54 | arch: x86 55 | - name: windows-x86_64 56 | os: windows-latest 57 | arch: x86_64 58 | runs-on: ${{ matrix.os }} 59 | steps: 60 | - if: matrix.name == 'linux-i686' 61 | run: | 62 | sudo dpkg --add-architecture i386 63 | sudo apt-get update 64 | sudo apt-get install -y gcc-multilib 65 | - name: Setup D compiler 66 | uses: dlang-community/setup-dlang@v2 67 | with: 68 | compiler: ldc-latest 69 | - name: Checkout source 70 | uses: actions/checkout@v4 71 | - name: Build binary release 72 | run: dub build -a ${{ matrix.arch }} -b release 73 | - name: Create dist 74 | uses: papeloto/action-zip@v1 75 | with: 76 | files: build README.md 77 | dest: adrdox_${{ matrix.name }}.zip 78 | - name: Upload dist 79 | uses: actions/upload-artifact@v2 80 | with: 81 | name: dist 82 | path: adrdox_${{ matrix.name }}.zip 83 | 84 | publish-release: 85 | if: startsWith(github.ref, 'refs/tags/v') 86 | needs: 87 | - build-release 88 | runs-on: ubuntu-latest 89 | steps: 90 | - name: Download dists 91 | uses: actions/download-artifact@v2 92 | with: 93 | name: dist 94 | - name: Create release 95 | uses: softprops/action-gh-release@v1 96 | env: 97 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 98 | with: 99 | files: '*.zip' 100 | #prerelease: true 101 | draft: true 102 | -------------------------------------------------------------------------------- /Dscanner/libdparse/src/std/experimental/lexer.d: -------------------------------------------------------------------------------- 1 | // Written in the D programming language 2 | 3 | /** 4 | * $(H2 Summary) 5 | * This module contains a range-based compile-time _lexer generator. 6 | * 7 | * $(H2 Overview) 8 | * The _lexer generator consists of a template mixin, $(LREF Lexer), along with 9 | * several helper templates for generating such things as token identifiers. 10 | * 11 | * To write a _lexer using this API: 12 | * $(OL 13 | * $(LI Create the string array constants for your language. 14 | * $(UL 15 | * $(LI $(LINK2 #.staticTokens, staticTokens)) 16 | * $(LI $(LINK2 #.dynamicTokens, dynamicTokens)) 17 | * $(LI $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens)) 18 | * $(LI $(LINK2 #.tokenHandlers, tokenHandlers)) 19 | * )) 20 | * $(LI Create aliases for the various token and token identifier types 21 | * specific to your language. 22 | * $(UL 23 | * $(LI $(LREF TokenIdType)) 24 | * $(LI $(LREF tokenStringRepresentation)) 25 | * $(LI $(LREF TokenStructure)) 26 | * $(LI $(LREF TokenId)) 27 | * )) 28 | * $(LI Create a struct that mixes in the Lexer template mixin and 29 | * implements the necessary functions. 30 | * $(UL 31 | * $(LI $(LREF Lexer)) 32 | * )) 33 | * ) 34 | * Examples: 35 | * $(UL 36 | * $(LI A _lexer for D is available $(LINK2 https://github.com/Hackerpilot/Dscanner/blob/master/std/d/lexer.d, here).) 37 | * $(LI A _lexer for Lua is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/lualexer.d, here).) 38 | * $(LI A _lexer for JSON is available $(LINK2 https://github.com/Hackerpilot/lexer-demo/blob/master/jsonlexer.d, here).) 39 | * ) 40 | * $(DDOC_ANCHOR TemplateParameters) $(H2 Template Parameter Definitions) 41 | * $(DL 42 | * $(DT $(DDOC_ANCHOR defaultTokenFunction) $(B defaultTokenFunction) 43 | * $(DD A function that serves as the default token lexing function. For most 44 | * languages this will be the identifier lexing function.)) 45 | * $(DT $(DDOC_ANCHOR tokenSeparatingFunction) $(B tokenSeparatingFunction)) 46 | * $(DD A function that is able to determine if an identifier/keyword has come 47 | * to an end. This function must return bool and take a single size_t 48 | * argument representing the number of bytes to skip over before looking for 49 | * a separating character.) 50 | * $(DT $(DDOC_ANCHOR staticTokens) $(B staticTokens)) 51 | * $(DD A listing of the tokens whose exact value never changes and which cannot 52 | * possibly be a token handled by the default token lexing function. The 53 | * most common example of this kind of token is an operator such as 54 | * $(D_STRING "*"), or $(D_STRING "-") in a programming language.) 55 | * $(DT $(DDOC_ANCHOR dynamicTokens) $(B dynamicTokens)) 56 | * $(DD A listing of tokens whose value is variable, such as whitespace, 57 | * identifiers, number literals, and string literals.) 58 | * $(DT $(DDOC_ANCHOR possibleDefaultTokens) $(B possibleDefaultTokens)) 59 | * $(DD A listing of tokens that could posibly be one of the tokens handled by 60 | * the default token handling function. An common example of this is 61 | * a keyword such as $(D_STRING "for"), which looks like the beginning of 62 | * the identifier $(D_STRING "fortunate"). $(B tokenSeparatingFunction) is 63 | * called to determine if the character after the $(D_STRING 'r') separates 64 | * the identifier, indicating that the token is $(D_STRING "for"), or if 65 | * lexing should be turned over to the $(B defaultTokenFunction).) 66 | * $(DT $(DDOC_ANCHOR tokenHandlers) $(B tokenHandlers)) 67 | * $(DD A mapping of prefixes to custom token handling function names. The 68 | * generated _lexer will search for the even-index elements of this array, 69 | * and then call the function whose name is the element immedately after the 70 | * even-indexed element. This is used for lexing complex tokens whose prefix 71 | * is fixed.) 72 | * ) 73 | * 74 | * Here are some example constants for a simple calculator _lexer: 75 | * --- 76 | * // There are a near infinite number of valid number literals, so numbers are 77 | * // dynamic tokens. 78 | * enum string[] dynamicTokens = ["numberLiteral", "whitespace"]; 79 | * 80 | * // The operators are always the same, and cannot start a numberLiteral, so 81 | * // they are staticTokens 82 | * enum string[] staticTokens = ["-", "+", "*", "/"]; 83 | * 84 | * // In this simple example there are no keywords or other tokens that could 85 | * // look like dynamic tokens, so this is blank. 86 | * enum string[] possibleDefaultTokens = []; 87 | * 88 | * // If any whitespace character or digit is encountered, pass lexing over to 89 | * // our custom handler functions. These will be demonstrated in an example 90 | * // later on. 91 | * enum string[] tokenHandlers = [ 92 | * "0", "lexNumber", 93 | * "1", "lexNumber", 94 | * "2", "lexNumber", 95 | * "3", "lexNumber", 96 | * "4", "lexNumber", 97 | * "5", "lexNumber", 98 | * "6", "lexNumber", 99 | * "7", "lexNumber", 100 | * "8", "lexNumber", 101 | * "9", "lexNumber", 102 | * " ", "lexWhitespace", 103 | * "\n", "lexWhitespace", 104 | * "\t", "lexWhitespace", 105 | * "\r", "lexWhitespace" 106 | * ]; 107 | * --- 108 | * 109 | * Copyright: Brian Schott 2013 110 | * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0) 111 | * Authors: Brian Schott, with ideas shamelessly stolen from Andrei Alexandrescu 112 | * Source: $(PHOBOSSRC std/experimental/_lexer.d) 113 | */ 114 | 115 | module std.experimental.lexer; 116 | 117 | /** 118 | * Template for determining the type used for a token type. 119 | * 120 | * Selects the smallest unsigned integral type that is able to hold the value 121 | * staticTokens.length + dynamicTokens.length + possibleDefaultTokens.length. 122 | * For example if there are 20 static tokens, 30 dynamic tokens, 123 | * and 10 possible default tokens, this template will alias itself to ubyte, 124 | * as 20 + 30 + 10 < $(D_KEYWORD ubyte).max. 125 | * Examples: 126 | * --- 127 | * // In our calculator example this means that IdType is an alias for ubyte. 128 | * alias IdType = TokenIdType!(staticTokens, dynamicTokens, possibleDefaultTokens); 129 | * --- 130 | */ 131 | template TokenIdType(alias staticTokens, alias dynamicTokens, 132 | alias possibleDefaultTokens) 133 | { 134 | immutable tokenCount = staticTokens.length + dynamicTokens.length 135 | + possibleDefaultTokens.length + 1; 136 | static if (tokenCount <= ubyte.max) 137 | alias TokenIdType = ubyte; 138 | else static if (tokenCount <= ushort.max) 139 | alias TokenIdType = ushort; 140 | else static if (tokenCount <= uint.max) 141 | alias TokenIdType = uint; 142 | else 143 | static assert (false, "The number of tokens must be less than uint.max"); 144 | } 145 | 146 | /** 147 | * Looks up the string representation of the given token type. 148 | * 149 | * This is the opposite of the function of the TokenId template. 150 | * Params: type = the token type identifier 151 | * Examples: 152 | * --- 153 | * alias str = tokenStringRepresentation(IdType, staticTokens, dynamicTokens, possibleDefaultTokens); 154 | * assert (str(tok!"*") == "*"); 155 | * --- 156 | * See_also: $(LREF TokenId) 157 | */ 158 | string tokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens, 159 | alias possibleDefaultTokens)(IdType type) @property 160 | { 161 | enum tokens = staticTokens ~ dynamicTokens ~ possibleDefaultTokens; 162 | 163 | if (type == 0) 164 | return "!ERROR!"; 165 | else if (type < tokens.length + 1) 166 | return tokens[type - 1]; 167 | else 168 | return null; 169 | } 170 | 171 | unittest 172 | { 173 | alias IdType = TokenIdType!(["foo"], ["bar"], ["doo"]); 174 | enum tok(string token) = TokenId!(IdType, ["foo"], ["bar"], ["doo"], token); 175 | alias str = tokenStringRepresentation!(IdType, ["foo"], ["bar"], ["doo"]); 176 | 177 | static assert (str(tok!"foo") == "foo"); 178 | static assert (str(tok!"bar") == "bar"); 179 | static assert (str(tok!"doo") == "doo"); 180 | } 181 | 182 | /** 183 | * Generates the token type identifier for the given symbol. 184 | * 185 | * There are two special cases: 186 | * $(UL 187 | * $(LI If symbol is $(D_STRING ""), then the token identifier will be 0) 188 | * $(LI If symbol is $(D_STRING "\0"), then the token identifier will be the maximum 189 | * valid token type identifier) 190 | * ) 191 | * In all cases this template will alias itself to a constant of type IdType. 192 | * This template will fail at compile time if $(D_PARAM symbol) is not one of 193 | * the staticTokens, dynamicTokens, or possibleDefaultTokens. 194 | * Examples: 195 | * --- 196 | * template tok(string symbol) 197 | * { 198 | * alias tok = TokenId!(IdType, staticTokens, dynamicTokens, 199 | * possibleDefaultTokens, symbol); 200 | * } 201 | * // num and plus are of type ubyte. 202 | * IdType plus = tok!"+"; 203 | * IdType num = tok!"numberLiteral"; 204 | * --- 205 | */ 206 | template TokenId(IdType, alias staticTokens, alias dynamicTokens, 207 | alias possibleDefaultTokens, string symbol) 208 | { 209 | enum tokens = staticTokens ~ dynamicTokens ~ possibleDefaultTokens; 210 | 211 | import std.algorithm; 212 | static if (symbol == "") 213 | { 214 | enum id = 0; 215 | alias TokenId = id; 216 | } 217 | else static if (symbol == "\0") 218 | { 219 | enum id = 1 + tokens.length; 220 | alias TokenId = id; 221 | } 222 | else 223 | { 224 | enum i = tokens.countUntil(symbol); 225 | static if (i != -1) 226 | { 227 | enum id = i + 1; 228 | static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol); 229 | alias TokenId = id; 230 | } 231 | else 232 | static assert (0, "Invalid token: " ~ symbol); 233 | } 234 | } 235 | 236 | /** 237 | * The token that is returned by the lexer. 238 | * Params: 239 | * IdType = The D type of the "type" token type field. 240 | * extraFields = A string containing D code for any extra fields that should 241 | * be included in the token structure body. This string is passed 242 | * directly to a mixin statement. 243 | * Examples: 244 | * --- 245 | * // No extra struct fields are desired in this example, so leave it blank. 246 | * alias Token = TokenStructure!(IdType, ""); 247 | * Token minusToken = Token(tok!"-"); 248 | * --- 249 | */ 250 | struct TokenStructure(IdType, string extraFields = "") 251 | { 252 | public pure nothrow @safe @nogc: 253 | 254 | bool opEquals(ref const typeof(this) other) const 255 | { 256 | return this.type == other.type && this.text == other.text; 257 | } 258 | 259 | /** 260 | * Returs: true if the token has the given type, false otherwise. 261 | */ 262 | bool opEquals(IdType type) const 263 | { 264 | return this.type == type; 265 | } 266 | 267 | /** 268 | * Constructs a token from a token type. 269 | * Params: type = the token type 270 | */ 271 | this(IdType type) 272 | { 273 | this.type = type; 274 | } 275 | 276 | /** 277 | * Constructs a token. 278 | * Params: 279 | * type = the token type 280 | * text = the text of the token, which may be null 281 | * line = the line number at which this token occurs 282 | * column = the column number at which this token occurs 283 | * index = the byte offset from the beginning of the input at which this 284 | * token occurs 285 | */ 286 | this(IdType type, string text, size_t line, size_t column, size_t index) 287 | { 288 | this.text = text; 289 | this.line = line; 290 | this.column = column; 291 | this.type = type; 292 | this.index = index; 293 | } 294 | 295 | /** 296 | * The _text of the token. 297 | */ 298 | string text; 299 | 300 | /** 301 | * The _line number at which this token occurs. 302 | */ 303 | size_t line; 304 | 305 | /** 306 | * The _column number at which this token occurs. This is measured in bytes 307 | * and may not be correct when tab characters are involved. 308 | */ 309 | size_t column; 310 | 311 | /** 312 | * The byte offset from the beginning of the input at which this token 313 | * occurs. 314 | */ 315 | size_t index; 316 | 317 | /** 318 | * The token type. 319 | */ 320 | IdType type; 321 | 322 | mixin (extraFields); 323 | } 324 | 325 | /** 326 | * The implementation of the _lexer is contained within this mixin template. 327 | * 328 | * To use it, this template should be mixed in to a struct that represents the 329 | * _lexer for your language. This struct should implement the following methods: 330 | * $(UL 331 | * $(LI popFront, which should call this mixin's _popFront() and 332 | * additionally perform any token filtering or shuffling you deem 333 | * necessary. For example, you can implement popFront to skip comment or 334 | * tokens.) 335 | * $(LI A function that serves as the default token lexing function. For 336 | * most languages this will be the identifier lexing function. This 337 | * should then be passed to the $(LREF Lexer) template mixin as the 338 | * $(LINK2 #.defaultTokenFunction defaultTokenFunction) template 339 | * parameter.) 340 | * $(LI A function that is able to determine if an identifier/keyword has 341 | * come to an end. This function must return $(D_KEYWORD bool) and take 342 | * a single $(D_KEYWORD size_t) argument representing the number of 343 | * bytes to skip over before looking for a separating character.) 344 | * $(LI Any functions referred to in the tokenHandlers template paramater. 345 | * These functions must be marked $(D_KEYWORD pure nothrow), take no 346 | * arguments, and return a token) 347 | * $(LI A constructor that initializes the range field as well as calls 348 | * popFront() exactly once (to initialize the _front field).) 349 | * ) 350 | * Params: 351 | * Token = $(LREF TokenStructure) 352 | * defaultTokenFunction = $(LINK2 #.defaultTokenFunction, defaultTokenFunction) 353 | * tokenSeparatingFunction = $(LINK2 #.tokenSeparatingFunction, tokenSeparatingFunction) 354 | * staticTokens = $(LINK2 #.staticTokens, staticTokens) 355 | * dynamicTokens = $(LINK2 #.dynamicTokens, dynamicTokens) 356 | * possibleDefaultTokens = $(LINK2 #.possibleDefaultTokens, possibleDefaultTokens) 357 | * tokenHandlers = $(LINK2 #.tokenHandlers, tokenHandlers) 358 | * Examples: 359 | * --- 360 | * struct CalculatorLexer 361 | * { 362 | * mixin Lexer!(IdType, Token, defaultTokenFunction, isSeparating, 363 | * staticTokens, dynamicTokens, possibleDefaultTokens, tokenHandlers); 364 | * 365 | * this (ubyte[] bytes) 366 | * { 367 | * this.range = LexerRange(bytes); 368 | * popFront(); 369 | * } 370 | * 371 | * void popFront() pure 372 | * { 373 | * _popFront(); 374 | * } 375 | * 376 | * Token lexNumber() pure nothrow @safe 377 | * { 378 | * // implementation goes here 379 | * } 380 | * 381 | * Token lexWhitespace() pure nothrow @safe 382 | * { 383 | * // implementation goes here 384 | * } 385 | * 386 | * Token defaultTokenFunction() pure nothrow @safe 387 | * { 388 | * // There is no default token in the example calculator language, so 389 | * // this is always an error. 390 | * range.popFront(); 391 | * return Token(tok!""); 392 | * } 393 | * 394 | * bool isSeparating(size_t offset) pure nothrow @safe 395 | * { 396 | * // For this example language, always return true. 397 | * return true; 398 | * } 399 | * } 400 | * --- 401 | */ 402 | mixin template Lexer(Token, alias defaultTokenFunction, 403 | alias tokenSeparatingFunction, alias staticTokens, alias dynamicTokens, 404 | alias possibleDefaultTokens, alias tokenHandlers) 405 | { 406 | private alias _IDType = typeof(Token.type); 407 | private enum _tok(string symbol) = TokenId!(_IDType, staticTokens, dynamicTokens, possibleDefaultTokens, symbol); 408 | 409 | static assert (tokenHandlers.length % 2 == 0, "Each pseudo-token must" 410 | ~ " have a corresponding handler function name."); 411 | 412 | static string generateMask(const ubyte[] arr) 413 | { 414 | ulong u; 415 | for (size_t i = 0; i < arr.length && i < 8; i++) 416 | { 417 | u |= (cast(ulong) arr[i]) << (i * 8); 418 | } 419 | 420 | return toHex(u); 421 | } 422 | 423 | private static string generateByteMask(size_t l) 424 | { 425 | return toHex(ulong.max >> ((8 - l) * 8)); 426 | } 427 | 428 | private static size_t calcSplitCount(size_t a, size_t b) pure nothrow 429 | { 430 | int i; 431 | while (true) 432 | { 433 | i++; 434 | a /= 2; 435 | if (a < b) 436 | break; 437 | } 438 | return i; 439 | } 440 | 441 | private static char[] getBeginningChars(string[] allTokens) 442 | { 443 | char[] beginningChars; 444 | for (size_t i = 0; i < allTokens.length; i++) 445 | { 446 | if (allTokens[i].length == 0) 447 | continue; 448 | beginningChars ~= allTokens[i][0]; 449 | size_t j = i + 1; 450 | while (j < allTokens.length && allTokens[i][0] == allTokens[j][0]) 451 | j++; 452 | i = j - 1; 453 | } 454 | return beginningChars; 455 | } 456 | 457 | private static string generateStatements() 458 | { 459 | import std.algorithm : sort; 460 | import std.range : stride; 461 | 462 | string[] pseudoTokens = array(tokenHandlers.stride(2)); 463 | string[] allTokens = array(sort(staticTokens ~ possibleDefaultTokens ~ pseudoTokens).uniq()); 464 | // Array consisting of a sorted list of the first characters of the 465 | // tokens. 466 | char[] beginningChars = getBeginningChars(allTokens); 467 | size_t i = calcSplitCount(beginningChars.length, 8); 468 | return generateStatementsStep(allTokens, pseudoTokens, beginningChars, i); 469 | } 470 | 471 | private static string generateStatementsStep(string[] allTokens, 472 | string[] pseudoTokens, char[] chars, size_t i, string indent = "") 473 | { 474 | string code; 475 | if (i > 0) 476 | { 477 | size_t p = chars.length / 2; 478 | code ~= indent ~ "if (f < "~toHex(chars[p])~") {\n"; 479 | code ~= generateStatementsStep(allTokens, pseudoTokens, chars[0 .. p], i - 1, indent ~ " "); 480 | code ~= indent ~ "}\n" ~ indent ~ "else\n" ~ indent ~ "{\n"; 481 | code ~= generateStatementsStep(allTokens, pseudoTokens, chars[p .. $], i - 1, indent ~ " "); 482 | code ~= indent ~ "}\n"; 483 | } 484 | else 485 | { 486 | code ~= indent ~ "switch (f)\n" ~ indent ~ "{\n"; 487 | foreach (char c; chars) 488 | { 489 | size_t begin; 490 | size_t end; 491 | for (size_t j = 0; j < allTokens.length; j++) 492 | { 493 | if (allTokens[j].length == 0 || allTokens[j][0] != c) 494 | continue; 495 | begin = j; 496 | end = j + 1; 497 | while (end < allTokens.length && allTokens[begin][0] == allTokens[end][0]) 498 | end++; 499 | break; 500 | } 501 | code ~= indent ~ "case "~toHex(c)~":\n"; 502 | code ~= printCase(allTokens[begin .. end], pseudoTokens, indent ~ " "); 503 | } 504 | code ~= indent ~ "default: goto _defaultTokenFunction;\n"; 505 | code ~= indent ~ "}\n"; 506 | } 507 | 508 | return code; 509 | } 510 | 511 | private static string printCase(string[] tokens, string[] pseudoTokens, string indent) 512 | { 513 | import std.array : array; 514 | import std.algorithm : countUntil; 515 | import std.conv : text; 516 | string[] sortedTokens = array(sort!"a.length > b.length"(tokens)); 517 | 518 | 519 | if (tokens.length == 1 && tokens[0].length == 1) 520 | { 521 | if (pseudoTokens.countUntil(tokens[0]) >= 0) 522 | { 523 | return indent ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1] 524 | ~ "(token);\n" ~ indent ~ "return;\n"; 525 | } 526 | else if (staticTokens.countUntil(tokens[0]) >= 0) 527 | { 528 | return indent ~ "range.index++; range.column++;\n" 529 | ~ indent ~ "token= Token(_tok!\"" ~ escape(tokens[0]) ~ "\", null, line, column, index);\n" 530 | ~ indent ~ "return;"; 531 | } 532 | else if (pseudoTokens.countUntil(tokens[0]) >= 0) 533 | { 534 | return indent ~ tokenHandlers[tokenHandlers.countUntil(tokens[0]) + 1] 535 | ~ "(token);\n" ~ indent ~ "return;\n"; 536 | 537 | } 538 | } 539 | 540 | string code; 541 | 542 | bool insertTrailingGoto = true; 543 | foreach (i, token; sortedTokens) 544 | { 545 | immutable mask = generateMask(cast (const ubyte[]) token); 546 | if (token.length >= 8) 547 | code ~= indent ~ "if (frontBytes == " ~ mask ~ ")\n"; 548 | else if (token.length != 1) 549 | code ~= indent ~ "if ((frontBytes & " ~ generateByteMask(token.length) ~ ") == " ~ mask ~ ")\n"; 550 | if (token.length != 1) 551 | code ~= indent ~ "{\n"; 552 | if (pseudoTokens.countUntil(token) >= 0) 553 | { 554 | if (token.length <= 8) 555 | { 556 | code ~= indent ~ " " 557 | ~ tokenHandlers[tokenHandlers.countUntil(token) + 1] 558 | ~ "(token);\n"; 559 | code ~= indent ~ "return;\n"; 560 | } 561 | else 562 | { 563 | code ~= indent ~ " if (range.startsWith(cast (ubyte[]) \"" ~ escape(token) ~ "\")\n"; 564 | code ~= indent ~ " " 565 | ~ tokenHandlers[tokenHandlers.countUntil(token) + 1] 566 | ~ "();\n"; 567 | code ~= indent ~ "return;\n"; 568 | } 569 | } 570 | else if (staticTokens.countUntil(token) >= 0) 571 | { 572 | if (token.length <= 8) 573 | { 574 | insertTrailingGoto = false; 575 | code ~= indent ~ (token.length != 1 ? " " : "") ~ "range.index += " ~ text(token.length) ~ "; range.column += " ~ text(token.length) ~ ";\n"; 576 | code ~= indent ~ (token.length != 1 ? " " : "") ~ "token = Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n"; 577 | code ~= indent ~ (token.length != 1 ? " " : "") ~ "return;\n"; 578 | } 579 | else 580 | { 581 | code ~= indent ~ " pragma(msg, \"long static tokens not supported\"); // " ~ escape(token) ~ "\n"; 582 | } 583 | } 584 | else 585 | { 586 | // possible default 587 | if (token.length <= 8) 588 | { 589 | code ~= indent ~ " if (tokenSeparatingFunction(" ~ text(token.length) ~ "))\n"; 590 | code ~= indent ~ " {\n"; 591 | code ~= indent ~ " range.index += " ~ text(token.length) ~ "; range.column += " ~ text(token.length) ~ ";\n"; 592 | code ~= indent ~ " token = Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n"; 593 | code ~= indent ~ " return;\n"; 594 | code ~= indent ~ " }\n"; 595 | code ~= indent ~ " else\n"; 596 | code ~= indent ~ " goto _defaultTokenFunction;\n"; 597 | } 598 | else 599 | { 600 | code ~= indent ~ " if (range.startsWith(cast (ubyte[]) \"" ~ escape(token) ~"\") && isSeparating(" ~ text(token.length) ~ "))\n"; 601 | code ~= indent ~ " {\n"; 602 | code ~= indent ~ " range.index += " ~ text(token.length) ~ "; range.column += " ~ text(token.length) ~ ";\n"; 603 | code ~= indent ~ " token = Token(_tok!\"" ~ escape(token) ~ "\", null, line, column, index);\n"; 604 | code ~= indent ~ " return;\n"; 605 | code ~= indent ~ " }\n"; 606 | code ~= indent ~ " else\n"; 607 | code ~= indent ~ " goto _defaultTokenFunction;\n"; 608 | } 609 | } 610 | if (token.length != 1) 611 | { 612 | code ~= indent ~ "}\n"; 613 | } 614 | } 615 | if (insertTrailingGoto) 616 | code ~= indent ~ "goto _defaultTokenFunction;\n"; 617 | return code; 618 | } 619 | 620 | /** 621 | * Implements the range primitive _front. 622 | */ 623 | ref const(Token) front()() pure nothrow const @property @safe 624 | { 625 | return _front; 626 | } 627 | 628 | /** 629 | * Advances the lexer to the next token and stores the new current token in 630 | * the _front variable. 631 | */ 632 | void _popFront()() pure nothrow @safe 633 | { 634 | advance(_front); 635 | } 636 | 637 | /** 638 | * Implements the range primitive _empty. 639 | */ 640 | bool empty()() pure const nothrow @property @safe @nogc 641 | { 642 | return _front.type == _tok!"\0"; 643 | } 644 | 645 | static string escape(string input) pure @trusted 646 | { 647 | string retVal; 648 | foreach (ubyte c; cast(ubyte[]) input) 649 | { 650 | switch (c) 651 | { 652 | case '\\': retVal ~= `\\`; break; 653 | case '"': retVal ~= `\"`; break; 654 | case '\'': retVal ~= `\'`; break; 655 | case '\t': retVal ~= `\t`; break; 656 | case '\n': retVal ~= `\n`; break; 657 | case '\r': retVal ~= `\r`; break; 658 | default: retVal ~= c; break; 659 | } 660 | } 661 | return retVal; 662 | } 663 | 664 | enum tokenSearch = generateStatements(); 665 | 666 | static ulong getFront(const ubyte[] arr) pure nothrow @trusted 667 | { 668 | static union ByteArr { ulong l; ubyte[8] arr; } 669 | static assert(ByteArr.sizeof == ulong.sizeof); 670 | ByteArr b; 671 | b.l = ulong.max; 672 | b.arr[0 .. arr.length] = arr[]; 673 | return b.l; 674 | } 675 | 676 | void advance(ref Token token) pure nothrow @trusted 677 | { 678 | if (range.index >= range.bytes.length) 679 | { 680 | token.type = _tok!"\0"; 681 | return; 682 | } 683 | immutable size_t index = range.index; 684 | immutable size_t column = range.column; 685 | immutable size_t line = range.line; 686 | immutable ulong frontBytes = range.index + 8 <= range.bytes.length 687 | ? getFront(range.bytes[range.index .. range.index + 8]) 688 | : getFront(range.bytes[range.index .. $]); 689 | ubyte f = cast(ubyte) frontBytes; 690 | // pragma(msg, tokenSearch); 691 | mixin(tokenSearch); 692 | _defaultTokenFunction: 693 | defaultTokenFunction(token); 694 | } 695 | 696 | /** 697 | * The lexer input. 698 | */ 699 | LexerRange range; 700 | 701 | /** 702 | * The token that is currently at the front of the range. 703 | */ 704 | Token _front; 705 | } 706 | 707 | /** 708 | * Range structure that wraps the _lexer's input. 709 | */ 710 | struct LexerRange 711 | { 712 | // TODO: When D gets @forceinline the template inline hack (i.e 713 | // `void front()() { ... }` )should be removed. 714 | 715 | public nothrow pure @safe @nogc: 716 | /** 717 | * Params: 718 | * bytes = the _lexer input 719 | * index = the initial offset from the beginning of $(D_PARAM bytes) 720 | * column = the initial _column number 721 | * line = the initial _line number 722 | */ 723 | this(const(ubyte)[] bytes, size_t index = 0, size_t column = 1, size_t line = 1) 724 | { 725 | this.bytes = bytes; 726 | this.index = index; 727 | this.column = column; 728 | this.line = line; 729 | } 730 | 731 | /** 732 | * Returns: a mark at the current position that can then be used with slice. 733 | */ 734 | size_t mark()() const 735 | { 736 | return index; 737 | } 738 | 739 | /** 740 | * Sets the range to the given position. 741 | * Params: m = the position to seek to 742 | */ 743 | void seek()(size_t m) 744 | { 745 | index = m; 746 | } 747 | 748 | /** 749 | * Returs a slice of the input byte array between the given mark and the 750 | * current position. 751 | * Params m = the beginning index of the slice to return 752 | */ 753 | const(ubyte)[] slice()(size_t m) const 754 | { 755 | return bytes[m .. index]; 756 | } 757 | 758 | /** 759 | * Implements the range primitive _empty. 760 | */ 761 | bool empty()() const 762 | { 763 | return index >= bytes.length; 764 | } 765 | 766 | /** 767 | * Implements the range primitive _front. 768 | */ 769 | ubyte front()() const 770 | { 771 | return bytes[index]; 772 | } 773 | 774 | /** 775 | * Returns: the current item as well as the items $(D_PARAM p) items ahead. 776 | */ 777 | const(ubyte)[] peek(size_t p) const 778 | { 779 | return index + p + 1 > bytes.length 780 | ? bytes[index .. $] 781 | : bytes[index .. index + p + 1]; 782 | } 783 | 784 | /** 785 | * Returns: true if the range starts with the given byte sequence 786 | */ 787 | bool startsWith(const(ubyte[]) needle) const 788 | { 789 | if (needle.length + index >= bytes.length) 790 | return false; 791 | foreach (i; 0 .. needle.length) 792 | if (needle[i] != bytes[index + i]) 793 | return false; 794 | return true; 795 | } 796 | 797 | /** 798 | * 799 | */ 800 | ubyte peekAt()(size_t offset) const 801 | { 802 | return bytes[index + offset]; 803 | } 804 | 805 | /** 806 | * Returns: true if it is possible to peek $(D_PARAM p) bytes ahead. 807 | */ 808 | bool canPeek()(size_t p) const 809 | { 810 | return index + p < bytes.length; 811 | } 812 | 813 | /** 814 | * Implements the range primitive _popFront. 815 | */ 816 | void popFront()() 817 | { 818 | index++; 819 | column++; 820 | } 821 | 822 | /** 823 | * Implements the algorithm _popFrontN more efficiently. This function does 824 | * not detect or handle newlines. 825 | */ 826 | void popFrontN()(size_t n) 827 | { 828 | index += n; 829 | column += n; 830 | } 831 | 832 | /** 833 | * Increments the range's line number and resets the column counter. 834 | */ 835 | void incrementLine()(size_t i = 1) 836 | { 837 | column = 1; 838 | line += i; 839 | } 840 | 841 | /** 842 | * The input _bytes. 843 | */ 844 | const(ubyte)[] bytes; 845 | 846 | /** 847 | * The range's current position. 848 | */ 849 | size_t index; 850 | 851 | /** 852 | * The current _column number. 853 | */ 854 | size_t column; 855 | 856 | /** 857 | * The current _line number. 858 | */ 859 | size_t line; 860 | } 861 | 862 | nothrow @safe 863 | private void toHexInternal(char[] where, ubyte b) { 864 | if(b < 16) 865 | where[0] = '0'; 866 | else { 867 | ubyte t = (b & 0xf0) >> 4; 868 | if(t >= 10) 869 | where[0] = cast(char) ('A' + t - 10); 870 | else 871 | where[0] = cast(char) ('0' + t); 872 | b &= 0x0f; 873 | } 874 | if(b >= 10) 875 | where[1] = cast(char) ('A' + b - 10); 876 | else 877 | where[1] = cast(char) ('0' + b); 878 | } 879 | 880 | private void toHexInternal(char[] where, ulong b) { 881 | foreach(i; 0 .. 8) 882 | toHexInternal(where[i * 2 .. i * 2 + 2], cast(ubyte) (b >> ((7-i) * 8))); 883 | } 884 | 885 | string toHex(ulong w) { 886 | char[18] buffer; 887 | buffer[0] = '0'; 888 | buffer[1] = 'x'; 889 | toHexInternal(buffer[2 .. $], w); 890 | return buffer.idup; 891 | } 892 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LIBDPARSE=Dscanner/libdparse/src/dparse/ast.d Dscanner/libdparse/src/dparse/formatter.d Dscanner/libdparse/src/dparse/parser.d Dscanner/libdparse/src/dparse/entities.d Dscanner/libdparse/src/dparse/lexer.d Dscanner/libdparse/src/std/experimental/lexer.d Dscanner/src/astprinter.d 2 | DMD?=dmd 3 | 4 | all: 5 | #$(DMD) diff.d terminal.d $(LIBDPARSE) 6 | $(DMD) -debug -m64 doc2.d latex.d jstex.d syntaxhighlighter.d comment.d stemmer.d dom.d script.d jsvar.d color.d archive.d -J. $(LIBDPARSE) -g # -version=std_parser_verbose 7 | # it may pull in script.d and jsvar.d later fyi 8 | # 9 | #$(DMD) -of/var/www/dpldocs.info/locate locate.d dom.d stemmer.d cgi -J. -version=fastcgi -m64 -debug 10 | pq: 11 | $(DMD) -m64 doc2.d latex.d jstex.d syntaxhighlighter.d comment.d stemmer.d dom.d script.d jsvar.d color.d archive.d -version=with_postgres database.d postgres.d -L-L/usr/local/pgsql/lib -L-lpq -J. $(LIBDPARSE) -g # -version=std_parser_verbose 12 | locate: 13 | $(DMD) -oflocate locate.d dom.d stemmer.d cgi -J. -version=scgi -m64 -debug postgres.d archive.d database.d -L-L/usr/local/pgsql/lib -g 14 | 15 | vps_locate: 16 | ldc2i -oflocate_vps -oq -O3 -m64 locate.d stemmer.d -J. -d-version=scgi -d-version=vps -g -L-L/usr/local/pgsql/lib -L-lpq 17 | ldc: 18 | echo "use make pq instead ldc is broken" 19 | echo ldc2 -oq -O3 --d-debug -m64 doc2.d latex.d jstex.d syntaxhighlighter.d comment.d archive.d stemmer.d dom.d color.d -J. $(LIBDPARSE) --d-version=with_postgres database.d postgres.d -L-L/usr/local/pgsql/lib -L-lpq -g # -version=std_parser_verbose 20 | 21 | http: 22 | $(DMD) -debug -ofserver -version=embedded_httpd -version=with_http_server -m64 doc2.d latex.d archive.d jstex.d cgi.d syntaxhighlighter.d comment.d stemmer.d dom.d script.d jsvar.d color.d -J. $(LIBDPARSE) -g # -version=std_parser_verbose 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is my documentation generator, the results of which can be seen on http://dpldocs.info/ 2 | 3 | You should be able to just clone it and run `make`, everything it needs is included. 4 | 5 | After you compile it, you can copy skeleton-default.html to skeleton.html and edit a few things inside it to your liking (or you can leave it and the generator will copy it automatically on first run). You may 6 | want to replace the contents of the `