├── LICENSE ├── README.md ├── kdl ├── __init__.py ├── grammar.py ├── grammar.tatsu └── parser.py ├── requirements.txt ├── setup.py └── tests ├── complex.kdl ├── complex_formatted.kdl ├── test_complex.py └── test_solo.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Serafina Brocious 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kdl-py 2 | 3 | A Python library for the [KDL Document Language](https://github.com/kdl-org/kdl). 4 | 5 | ## Install 6 | 7 | pip install kdl-py 8 | 9 | kdl-py is fully Python 2.7 and Python 3 friendly. 10 | 11 | ## Usage 12 | 13 | ```py 14 | from kdl import parse, Document, Node 15 | print(parse('''// Nodes can be separated into multiple lines 16 | title \ 17 | "Some title" 18 | 19 | 20 | // Files must be utf8 encoded! 21 | smile "😁" 22 | 23 | // Instead of anonymous nodes, nodes and properties can be wrapped 24 | // in "" for arbitrary node names. 25 | "!@#$@$%Q#$%~@!40" "1.2.3" "!!!!!"=true 26 | 27 | // The following is a legal bare identifier: 28 | foo123~!@#$%^&*.:'|/?+ "weeee" 29 | 30 | // And you can also use unicode! 31 | ノード お名前="☜(゚ヮ゚☜)" 32 | 33 | // kdl specifically allows properties and values to be 34 | // interspersed with each other, much like CLI commands. 35 | foo bar=true "baz" quux=false 1 2 3 36 | ''')) 37 | 38 | # Creating documents from scratch is currently very gross 39 | print() 40 | doc = Document() 41 | doc.append(Node(name='simple-name', properties=None, arguments=[123], children=[Node(name='complex name here!', properties=None, arguments=None, children=None)])) 42 | print(doc) 43 | ``` 44 | 45 | ``` 46 | title "Some title" 47 | smile "😁" 48 | !@#$@$%Q#$%~@!40 !!!!!=true "1.2.3" 49 | foo123~!@#$%^&*.:'|/?+ "weeee" 50 | ノード お名前="☜(゚ヮ゚☜)" 51 | foo bar=true quux=false "baz" 1 2 3 52 | 53 | simple-name 123 { 54 | "complex name here!" 55 | } 56 | ``` 57 | 58 | ## License 59 | 60 | The code is available under the [MIT license](LICENSE). The example above is 61 | made available from https://github.com/kdl-org/kdl under 62 | [Creative Commons Attribution-ShareAlike 4.0 International](https://github.com/kdl-org/kdl/blob/main/LICENSE.md). -------------------------------------------------------------------------------- /kdl/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import parse, Document, Node, Symbol 2 | 3 | __all__ = 'parse', 'Document', 'Node', 'Symbol' 4 | -------------------------------------------------------------------------------- /kdl/grammar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # CAVEAT UTILITOR 5 | # 6 | # This file was automatically generated by TatSu. 7 | # 8 | # https://pypi.python.org/pypi/tatsu/ 9 | # 10 | # Any changes you make to it will be overwritten the next time 11 | # the file is generated. 12 | 13 | 14 | from __future__ import print_function, division, absolute_import, unicode_literals 15 | 16 | import sys 17 | 18 | from tatsu.buffering import Buffer 19 | from tatsu.parsing import Parser 20 | from tatsu.parsing import tatsumasu, leftrec, nomemo 21 | from tatsu.parsing import leftrec, nomemo # noqa 22 | from tatsu.util import re, generic_main # noqa 23 | 24 | 25 | KEYWORDS = {} # type: ignore 26 | 27 | 28 | class KdlBuffer(Buffer): 29 | def __init__( 30 | self, 31 | text, 32 | whitespace=None, 33 | nameguard=None, 34 | comments_re=None, 35 | eol_comments_re=None, 36 | ignorecase=None, 37 | namechars='', 38 | **kwargs 39 | ): 40 | super(KdlBuffer, self).__init__( 41 | text, 42 | whitespace=whitespace, 43 | nameguard=nameguard, 44 | comments_re=comments_re, 45 | eol_comments_re=eol_comments_re, 46 | ignorecase=ignorecase, 47 | namechars=namechars, 48 | **kwargs 49 | ) 50 | 51 | 52 | class KdlParser(Parser): 53 | def __init__( 54 | self, 55 | whitespace=None, 56 | nameguard=None, 57 | comments_re=None, 58 | eol_comments_re=None, 59 | ignorecase=None, 60 | left_recursion=True, 61 | parseinfo=True, 62 | keywords=None, 63 | namechars='', 64 | buffer_class=KdlBuffer, 65 | **kwargs 66 | ): 67 | if keywords is None: 68 | keywords = KEYWORDS 69 | super(KdlParser, self).__init__( 70 | whitespace=whitespace, 71 | nameguard=nameguard, 72 | comments_re=comments_re, 73 | eol_comments_re=eol_comments_re, 74 | ignorecase=ignorecase, 75 | left_recursion=left_recursion, 76 | parseinfo=parseinfo, 77 | keywords=keywords, 78 | namechars=namechars, 79 | buffer_class=buffer_class, 80 | **kwargs 81 | ) 82 | 83 | @tatsumasu() 84 | def _start_(self): # noqa 85 | 86 | def block0(): 87 | self._ws_() 88 | self._closure(block0) 89 | self._nodes_() 90 | self.name_last_node('@') 91 | 92 | def block2(): 93 | self._ws_() 94 | self._closure(block2) 95 | self._check_eof() 96 | 97 | @tatsumasu() 98 | def _nodes_(self): # noqa 99 | 100 | def block0(): 101 | self._linespace_() 102 | self._closure(block0) 103 | 104 | def block1(): 105 | self._node_() 106 | self.add_last_node_to_name('@') 107 | 108 | def block3(): 109 | self._linespace_() 110 | self._closure(block3) 111 | self._closure(block1) 112 | 113 | @tatsumasu() 114 | def _node_(self): # noqa 115 | with self._optional(): 116 | self._token('/-') 117 | self.name_last_node('commented') 118 | 119 | def block1(): 120 | self._ws_() 121 | self._closure(block1) 122 | self._identifier_() 123 | self.name_last_node('name') 124 | 125 | def block3(): 126 | self._node_space_() 127 | self._node_props_and_args_() 128 | self.add_last_node_to_name('props_and_args') 129 | self._closure(block3) 130 | with self._optional(): 131 | 132 | def block5(): 133 | self._node_space_() 134 | self._closure(block5) 135 | self._node_children_() 136 | self.name_last_node('children') 137 | 138 | def block7(): 139 | self._ws_() 140 | self._closure(block7) 141 | self._node_terminator_() 142 | self.ast._define( 143 | ['children', 'commented', 'name'], 144 | ['props_and_args'] 145 | ) 146 | 147 | @tatsumasu() 148 | def _node_props_and_args_(self): # noqa 149 | with self._optional(): 150 | self._token('/-') 151 | self.name_last_node('commented') 152 | 153 | def block1(): 154 | self._ws_() 155 | self._closure(block1) 156 | with self._group(): 157 | with self._choice(): 158 | with self._option(): 159 | self._prop_() 160 | self.name_last_node('prop') 161 | with self._option(): 162 | self._value_() 163 | self.name_last_node('value') 164 | self._error('no available options') 165 | self.ast._define( 166 | ['commented', 'prop', 'value'], 167 | [] 168 | ) 169 | 170 | @tatsumasu() 171 | def _node_children_(self): # noqa 172 | with self._optional(): 173 | self._token('/-') 174 | self.name_last_node('commented') 175 | 176 | def block1(): 177 | self._ws_() 178 | self._closure(block1) 179 | self._token('{') 180 | self._nodes_() 181 | self.name_last_node('children') 182 | self._token('}') 183 | self.ast._define( 184 | ['children', 'commented'], 185 | [] 186 | ) 187 | 188 | @tatsumasu() 189 | def _node_space_(self): # noqa 190 | with self._choice(): 191 | with self._option(): 192 | with self._group(): 193 | 194 | def block0(): 195 | self._ws_() 196 | self._closure(block0) 197 | self._escline_() 198 | 199 | def block1(): 200 | self._ws_() 201 | self._closure(block1) 202 | with self._option(): 203 | 204 | def block2(): 205 | self._ws_() 206 | self._positive_closure(block2) 207 | self._error('no available options') 208 | 209 | @tatsumasu() 210 | def _node_terminator_(self): # noqa 211 | with self._choice(): 212 | with self._option(): 213 | self._single_line_comment_() 214 | with self._option(): 215 | self._newline_() 216 | with self._option(): 217 | self._token(';') 218 | with self._option(): 219 | self._check_eof() 220 | self._error('no available options') 221 | 222 | @tatsumasu() 223 | def _identifier_(self): # noqa 224 | with self._choice(): 225 | with self._option(): 226 | self._string_() 227 | self.name_last_node('string') 228 | with self._option(): 229 | self._bare_identifier_() 230 | self.name_last_node('bare') 231 | self._error('no available options') 232 | self.ast._define( 233 | ['bare', 'string'], 234 | [] 235 | ) 236 | 237 | @tatsumasu() 238 | def _bare_identifier_(self): # noqa 239 | with self._ifnot(): 240 | self._digit_() 241 | with self._ifnot(): 242 | with self._group(): 243 | self._node_terminator_() 244 | self._first_identifier_char_() 245 | self.add_last_node_to_name('@') 246 | 247 | def block1(): 248 | self._rest_identifier_char_() 249 | self.add_last_node_to_name('@') 250 | self._closure(block1) 251 | 252 | @tatsumasu() 253 | def _digit_(self): # noqa 254 | self._pattern('[0-9]') 255 | 256 | @tatsumasu() 257 | def _first_identifier_char_(self): # noqa 258 | with self._ifnot(): 259 | self._linespace_() 260 | with self._ifnot(): 261 | self._pattern('[\\\\<{};\\[=,"]') 262 | self._pattern('.') 263 | 264 | @tatsumasu() 265 | def _rest_identifier_char_(self): # noqa 266 | with self._ifnot(): 267 | self._linespace_() 268 | with self._ifnot(): 269 | self._pattern('[\\\\;=,"]') 270 | self._pattern('.') 271 | 272 | @tatsumasu() 273 | def _prop_(self): # noqa 274 | self._identifier_() 275 | self.name_last_node('name') 276 | self._token('=') 277 | self._value_() 278 | self.name_last_node('value') 279 | self.ast._define( 280 | ['name', 'value'], 281 | [] 282 | ) 283 | 284 | @tatsumasu() 285 | def _value_(self): # noqa 286 | with self._choice(): 287 | with self._option(): 288 | self._symbol_() 289 | with self._option(): 290 | self._number_() 291 | with self._option(): 292 | self._string_() 293 | with self._option(): 294 | self._boolean_() 295 | with self._option(): 296 | self._null_() 297 | self._error('no available options') 298 | 299 | @tatsumasu() 300 | def _string_(self): # noqa 301 | with self._choice(): 302 | with self._option(): 303 | self._raw_string_() 304 | with self._option(): 305 | self._escaped_string_() 306 | self._error('no available options') 307 | 308 | @tatsumasu() 309 | def _escaped_string_(self): # noqa 310 | self._token('"') 311 | 312 | def block1(): 313 | self._character_() 314 | self._closure(block1) 315 | self.name_last_node('escstring') 316 | self._token('"') 317 | self.ast._define( 318 | ['escstring'], 319 | [] 320 | ) 321 | 322 | @tatsumasu() 323 | def _character_(self): # noqa 324 | with self._choice(): 325 | with self._option(): 326 | self._token('\\') 327 | self._escape_() 328 | self.name_last_node('escape') 329 | with self._option(): 330 | self._pattern('[^"]') 331 | self.name_last_node('char') 332 | self._error('no available options') 333 | self.ast._define( 334 | ['char', 'escape'], 335 | [] 336 | ) 337 | 338 | @tatsumasu() 339 | def _escape_(self): # noqa 340 | with self._choice(): 341 | with self._option(): 342 | self._pattern('[\\\\\\/bfnrt]') 343 | self.name_last_node('named') 344 | with self._option(): 345 | self._token('u{') 346 | self._pattern('[0-9a-fA-F]{1,6}') 347 | self.name_last_node('unichar') 348 | self._token('}') 349 | self._error('no available options') 350 | self.ast._define( 351 | ['named', 'unichar'], 352 | [] 353 | ) 354 | 355 | @tatsumasu() 356 | def _raw_string_(self): # noqa 357 | self._token('r') 358 | self._raw_string_hash_() 359 | self.name_last_node('rawstring') 360 | self.ast._define( 361 | ['rawstring'], 362 | [] 363 | ) 364 | 365 | @tatsumasu() 366 | def _raw_string_hash_(self): # noqa 367 | with self._choice(): 368 | with self._option(): 369 | self._token('#') 370 | self._raw_string_hash_() 371 | self.name_last_node('@') 372 | self._token('#') 373 | with self._option(): 374 | self._raw_string_quotes_() 375 | self.name_last_node('@') 376 | self._error('no available options') 377 | 378 | @tatsumasu() 379 | def _raw_string_quotes_(self): # noqa 380 | self._token('"') 381 | self._pattern('[^"]*') 382 | self.name_last_node('@') 383 | self._token('"') 384 | 385 | @tatsumasu() 386 | def _symbol_(self): # noqa 387 | self._token(':') 388 | self._identifier_() 389 | self.name_last_node('symbol') 390 | self.ast._define( 391 | ['symbol'], 392 | [] 393 | ) 394 | 395 | @tatsumasu() 396 | def _number_(self): # noqa 397 | with self._choice(): 398 | with self._option(): 399 | self._hex_() 400 | with self._option(): 401 | self._octal_() 402 | with self._option(): 403 | self._binary_() 404 | with self._option(): 405 | self._decimal_() 406 | self._error('no available options') 407 | 408 | @tatsumasu() 409 | def _decimal_(self): # noqa 410 | self._pattern('[+\\-]?[0-9][0-9_]*(\\.[0-9][0-9_]*)?([eE][+-]?[0-9][0-9_]*)?') 411 | self.name_last_node('decimal') 412 | self.ast._define( 413 | ['decimal'], 414 | [] 415 | ) 416 | 417 | @tatsumasu() 418 | def _hex_(self): # noqa 419 | self._pattern('[+\\-]?0x[0-9a-fA-F][0-9a-fA-F_]*') 420 | self.name_last_node('hex') 421 | self.ast._define( 422 | ['hex'], 423 | [] 424 | ) 425 | 426 | @tatsumasu() 427 | def _octal_(self): # noqa 428 | self._pattern('[+\\-]?0o[0-7][0-7_]*') 429 | self.name_last_node('octal') 430 | self.ast._define( 431 | ['octal'], 432 | [] 433 | ) 434 | 435 | @tatsumasu() 436 | def _binary_(self): # noqa 437 | self._pattern('[+\\-]?0b[01][01_]*') 438 | self.name_last_node('binary') 439 | self.ast._define( 440 | ['binary'], 441 | [] 442 | ) 443 | 444 | @tatsumasu() 445 | def _boolean_(self): # noqa 446 | with self._group(): 447 | with self._choice(): 448 | with self._option(): 449 | self._token('true') 450 | with self._option(): 451 | self._token('false') 452 | self._error('no available options') 453 | self.name_last_node('boolean') 454 | self.ast._define( 455 | ['boolean'], 456 | [] 457 | ) 458 | 459 | @tatsumasu() 460 | def _null_(self): # noqa 461 | with self._group(): 462 | self._token('null') 463 | self.name_last_node('null') 464 | self.ast._define( 465 | ['null'], 466 | [] 467 | ) 468 | 469 | @tatsumasu() 470 | def _escline_(self): # noqa 471 | self._token('\\') 472 | 473 | def block0(): 474 | self._ws_() 475 | self._closure(block0) 476 | with self._group(): 477 | with self._choice(): 478 | with self._option(): 479 | self._single_line_comment_() 480 | with self._option(): 481 | self._newline_() 482 | self._error('no available options') 483 | 484 | @tatsumasu() 485 | def _linespace_(self): # noqa 486 | with self._choice(): 487 | with self._option(): 488 | self._check_eof() 489 | with self._option(): 490 | self._newline_() 491 | with self._option(): 492 | self._ws_() 493 | with self._option(): 494 | self._single_line_comment_() 495 | self._error('no available options') 496 | 497 | @tatsumasu() 498 | def _single_line_comment_(self): # noqa 499 | self._token('//') 500 | 501 | def block0(): 502 | self._newline_() 503 | self._skip_to(block0) 504 | 505 | @tatsumasu() 506 | def _multi_line_comment_(self): # noqa 507 | self._token('/*') 508 | with self._group(): 509 | with self._choice(): 510 | with self._option(): 511 | self._commented_block_() 512 | with self._option(): 513 | self._multi_line_comment_() 514 | self._error('no available options') 515 | self._token('*/') 516 | 517 | @tatsumasu() 518 | def _commented_block_(self): # noqa 519 | 520 | def block0(): 521 | with self._choice(): 522 | with self._option(): 523 | self._token('*') 524 | self._pattern('[^\\/]') 525 | with self._option(): 526 | self._pattern('[^*]') 527 | self._error('no available options') 528 | self._closure(block0) 529 | 530 | @tatsumasu() 531 | def _newline_(self): # noqa 532 | self._pattern('(\\r\\n|[\\r\\n\\u0085\\u000C\\u2028\\u2029])') 533 | 534 | @tatsumasu() 535 | def _ws_(self): # noqa 536 | self._pattern('([\\t \\u00A0\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000]|\\uFFEF)+') 537 | 538 | 539 | class KdlSemantics(object): 540 | def start(self, ast): # noqa 541 | return ast 542 | 543 | def nodes(self, ast): # noqa 544 | return ast 545 | 546 | def node(self, ast): # noqa 547 | return ast 548 | 549 | def node_props_and_args(self, ast): # noqa 550 | return ast 551 | 552 | def node_children(self, ast): # noqa 553 | return ast 554 | 555 | def node_space(self, ast): # noqa 556 | return ast 557 | 558 | def node_terminator(self, ast): # noqa 559 | return ast 560 | 561 | def identifier(self, ast): # noqa 562 | return ast 563 | 564 | def bare_identifier(self, ast): # noqa 565 | return ast 566 | 567 | def digit(self, ast): # noqa 568 | return ast 569 | 570 | def first_identifier_char(self, ast): # noqa 571 | return ast 572 | 573 | def rest_identifier_char(self, ast): # noqa 574 | return ast 575 | 576 | def prop(self, ast): # noqa 577 | return ast 578 | 579 | def value(self, ast): # noqa 580 | return ast 581 | 582 | def string(self, ast): # noqa 583 | return ast 584 | 585 | def escaped_string(self, ast): # noqa 586 | return ast 587 | 588 | def character(self, ast): # noqa 589 | return ast 590 | 591 | def escape(self, ast): # noqa 592 | return ast 593 | 594 | def raw_string(self, ast): # noqa 595 | return ast 596 | 597 | def raw_string_hash(self, ast): # noqa 598 | return ast 599 | 600 | def raw_string_quotes(self, ast): # noqa 601 | return ast 602 | 603 | def symbol(self, ast): # noqa 604 | return ast 605 | 606 | def number(self, ast): # noqa 607 | return ast 608 | 609 | def decimal(self, ast): # noqa 610 | return ast 611 | 612 | def hex(self, ast): # noqa 613 | return ast 614 | 615 | def octal(self, ast): # noqa 616 | return ast 617 | 618 | def binary(self, ast): # noqa 619 | return ast 620 | 621 | def boolean(self, ast): # noqa 622 | return ast 623 | 624 | def null(self, ast): # noqa 625 | return ast 626 | 627 | def escline(self, ast): # noqa 628 | return ast 629 | 630 | def linespace(self, ast): # noqa 631 | return ast 632 | 633 | def single_line_comment(self, ast): # noqa 634 | return ast 635 | 636 | def multi_line_comment(self, ast): # noqa 637 | return ast 638 | 639 | def commented_block(self, ast): # noqa 640 | return ast 641 | 642 | def newline(self, ast): # noqa 643 | return ast 644 | 645 | def ws(self, ast): # noqa 646 | return ast 647 | 648 | 649 | def main(filename, start=None, **kwargs): 650 | if start is None: 651 | start = 'start' 652 | if not filename or filename == '-': 653 | text = sys.stdin.read() 654 | else: 655 | with open(filename) as f: 656 | text = f.read() 657 | parser = KdlParser() 658 | return parser.parse(text, rule_name=start, filename=filename, **kwargs) 659 | 660 | 661 | if __name__ == '__main__': 662 | import json 663 | from tatsu.util import asjson 664 | 665 | ast = generic_main(main, KdlParser, name='Kdl') 666 | print('AST:') 667 | print(ast) 668 | print() 669 | print('JSON:') 670 | print(json.dumps(asjson(ast), indent=2)) 671 | print() 672 | -------------------------------------------------------------------------------- /kdl/grammar.tatsu: -------------------------------------------------------------------------------- 1 | @@grammar :: Kdl 2 | @@whitespace :: // 3 | 4 | start = {ws} @:nodes {ws} $; 5 | 6 | nodes = {linespace} {@+:node {linespace}}; 7 | node = [commented:'/-' {ws}] name:identifier {node_space props_and_args+:node_props_and_args} [{node_space} children:node_children {ws}] node_terminator; 8 | node_props_and_args = [commented:'/-' {ws}] (prop:prop | value:value); 9 | node_children = [commented:'/-' {ws}] '{' children:nodes '}'; 10 | node_space = ({ws} escline {ws}) | {ws}+; 11 | node_terminator = single_line_comment | newline | ';' | $; 12 | identifier = string:string | bare:bare_identifier; 13 | bare_identifier = !digit !(node_terminator ) @+:first_identifier_char {@+:rest_identifier_char}; 14 | digit = /[0-9]/; 15 | first_identifier_char = !linespace !/[\\<{};\[=,"]/ /./; 16 | rest_identifier_char = !linespace !/[\\;=,"]/ /./; 17 | prop = name:identifier '=' value:value; 18 | value = symbol | number | string | boolean | null; 19 | 20 | string = raw_string | escaped_string; 21 | escaped_string = '"' escstring:{character}* '"'; 22 | character = '\\' escape:escape | char:/[^"]/; 23 | escape = named:/[\\\/bfnrt]/ | 'u{' unichar:/[0-9a-fA-F]{1,6}/ '}'; 24 | 25 | raw_string = 'r' rawstring:raw_string_hash; 26 | raw_string_hash = '#' @:raw_string_hash '#' | @:raw_string_quotes; 27 | raw_string_quotes = '"' @:/[^"]*/ '"'; 28 | 29 | symbol = ':' symbol:identifier; 30 | 31 | number = hex | octal | binary | decimal; 32 | 33 | decimal = decimal:/[+\-]?[0-9][0-9_]*(\.[0-9][0-9_]*)?([eE][+-]?[0-9][0-9_]*)?/; 34 | hex = hex:/[+\-]?0x[0-9a-fA-F][0-9a-fA-F_]*/; 35 | octal = octal:/[+\-]?0o[0-7][0-7_]*/; 36 | binary = binary:/[+\-]?0b[01][01_]*/; 37 | 38 | boolean = boolean:('true' | 'false'); 39 | null = null:('null'); 40 | 41 | escline = '\' {ws} (single_line_comment | newline); 42 | 43 | linespace = $ | newline | ws | single_line_comment; 44 | 45 | single_line_comment = '//' ->newline; 46 | multi_line_comment = '/*' (commented_block | multi_line_comment) '*/'; 47 | commented_block = {'*' /[^\/]/ | /[^*]/}*; 48 | 49 | newline = /(\r\n|[\r\n\u0085\u000C\u2028\u2029])/; 50 | ws = /([\t \u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000]|\uFFEF)+/; 51 | -------------------------------------------------------------------------------- /kdl/parser.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .grammar import KdlParser 3 | import regex, sys 4 | 5 | if sys.version_info.major == 3: 6 | unicode = str 7 | unichr = chr 8 | 9 | model = KdlParser(whitespace='', parseinfo=False) 10 | 11 | namedEscapes = { 12 | '\\': '\\', 13 | '/': '/', 14 | 'r': '\r', 15 | 'n': '\n', 16 | 't': '\t', 17 | '"': '"', 18 | 'b': '\b', 19 | 'f': '\f', 20 | } 21 | namedEscapeInverse = {v : k for k, v in namedEscapes.items()} 22 | 23 | exists = lambda ast, name: ast is not None and name in ast and ast[name] is not None 24 | 25 | identRe = regex.compile(ur'^[^\\<{;\[=,"0-9\t \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFEF\r\n\u0085\u000C\u2028\u2029][^\\;=,"\t \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFEF\r\n\u0085\u000C\u2028\u2029]*$') 26 | def formatIdentifier(ident): 27 | if identRe.match(ident): 28 | return ident 29 | else: 30 | return formatString(ident) 31 | 32 | def formatString(val): 33 | if '\\' in val and '"' not in val: 34 | return u'r#"%s"#' % val 35 | return u'"%s"' % u''.join('\\' + namedEscapeInverse[c] if c in namedEscapeInverse else c for c in val) 36 | 37 | def formatValue(val): 38 | if isinstance(val, Symbol): 39 | return ':' + formatIdentifier(val.value) 40 | elif isinstance(val, str) or isinstance(val, unicode): 41 | return formatString(val) 42 | elif isinstance(val, bool): 43 | return 'true' if val else 'false' 44 | elif val is None: 45 | return 'null' 46 | else: 47 | return str(val) 48 | 49 | class Document(list): 50 | def __init__(self, document=None, preserve_property_order=False, symbols_as_strings=False): 51 | list.__init__(self) 52 | if document is not None: 53 | parse(document, preserve_property_order, symbols_as_strings, dlist=self) 54 | 55 | def __str__(self): 56 | return u'\n'.join(map(unicode, self)) 57 | 58 | class Node(object): 59 | def __init__(self, name, properties, arguments, children): 60 | self.name = name 61 | self.properties = properties 62 | self.arguments = arguments 63 | self.children = children 64 | 65 | def __str__(self): 66 | return self.format() 67 | 68 | def format(self, indent=False): 69 | fmt = formatIdentifier(self.name) 70 | if self.properties: 71 | for k, v in self.properties.items(): 72 | fmt += u' %s=%s' % (formatIdentifier(k), formatValue(v)) 73 | if self.arguments: 74 | for v in self.arguments: 75 | fmt += ' ' + formatValue(v) 76 | if self.children: 77 | fmt += ' {\n' 78 | for child in self.children: 79 | fmt += child.format(indent=True) + '\n' 80 | fmt += '}' 81 | return u'\n'.join('\t' + line for line in fmt.split('\n')) if indent else fmt 82 | 83 | def __repr__(self): 84 | return 'Node(name=%r%s%s%s)' % ( 85 | self.name, 86 | ', properties=%r' % self.properties if self.properties else '', 87 | ', arguments=%r' % self.arguments if self.arguments else '', 88 | ', children=%r' % self.children if self.children else '') 89 | 90 | def items(self): 91 | return self.properties.items() if self.properties else () 92 | 93 | def __iter__(self): 94 | if self.properties: 95 | for prop in self.properties.items(): 96 | yield prop 97 | if self.arguments: 98 | for arg in self.arguments: 99 | yield arg 100 | if self.children: 101 | for child in self.children: 102 | yield child 103 | 104 | def __getattr__(self, name): 105 | return self[name] 106 | 107 | def __getitem__(self, name): 108 | if isinstance(name, int): 109 | return self.arguments[name] 110 | else: 111 | return self.properties[name] 112 | 113 | class Symbol(object): 114 | def __init__(self, value): 115 | self.value = value 116 | 117 | def __repr__(self): 118 | return 'Symbol(%r)' % self.value 119 | 120 | def __str__(self): 121 | return ':%s' % self.value 122 | 123 | def __eq__(self, right): 124 | return (isinstance(right, Symbol) and right.value == self.value) or self.value == right 125 | 126 | def __ne__(self, right): 127 | return not (self == right) 128 | 129 | class Parser(object): 130 | def __init__(self, document, preserve_property_order, symbols_as_strings, dlist): 131 | self.preserve_property_order = preserve_property_order 132 | self.symbols_as_strings = symbols_as_strings 133 | 134 | if hasattr(document, 'read') and callable(document.read): 135 | document = document.read() 136 | if str is not unicode and isinstance(document, str): 137 | document = document.decode('utf-8') 138 | ast = model.parse(document) 139 | 140 | self.document = Document() if dlist is None else dlist 141 | self.document += self.parseNodes(ast) 142 | 143 | def parseNodes(self, ast): 144 | if ast[0] == [None] or (isinstance(ast[0], list) and len(ast[0]) > 0 and isinstance(ast[0][0], unicode)): 145 | # TODO: Figure out why empty documents are so strangely handled 146 | return [] 147 | nodes = map(self.parseNode, ast) 148 | return [node for node in nodes if node is not None] 149 | 150 | def parseNode(self, ast): 151 | if len(ast) == 0 or exists(ast, 'commented'): 152 | return 153 | name = self.parseIdentifier(ast['name']) 154 | children = props = args = None 155 | if exists(ast, 'props_and_args'): 156 | props, args = self.parsePropsAndArgs(ast['props_and_args']) 157 | if exists(ast, 'children') and not exists(ast['children'], 'commented'): 158 | children = self.parseNodes(ast['children']['children']) 159 | return Node(name, props, args, children) 160 | 161 | def parseIdentifier(self, ast): 162 | if exists(ast, 'bare'): 163 | return u''.join(ast['bare']) 164 | return self.parseString(ast['string']) 165 | 166 | def parsePropsAndArgs(self, ast): 167 | props = OrderedDict() if self.preserve_property_order else {} 168 | args = [] 169 | for elem in ast: 170 | if exists(elem, 'commented'): 171 | continue 172 | if exists(elem, 'prop'): 173 | props[self.parseIdentifier(elem['prop']['name'])] = self.parseValue(elem['prop']['value']) 174 | else: 175 | args.append(self.parseValue(elem['value'])) 176 | return props if len(props) else None, args if len(args) else None 177 | 178 | def parseValue(self, ast): 179 | if exists(ast, 'hex'): 180 | v = ast['hex'].replace('_', '') 181 | return int(v[0] + v[3:] if v[0] != '0' else v[2:], 16) 182 | elif exists(ast, 'octal'): 183 | v = ast['octal'].replace('_', '') 184 | return int(v[0] + v[3:] if v[0] != '0' else v[2:], 8) 185 | elif exists(ast, 'binary'): 186 | v = ast['binary'].replace('_', '') 187 | return int(v[0] + v[3:] if v[0] != '0' else v[2:], 2) 188 | elif exists(ast, 'decimal'): 189 | v = ast['decimal'].replace('_', '') 190 | if '.' in v or 'e' in v or 'E' in v: 191 | return float(v) 192 | else: 193 | return int(v) 194 | elif exists(ast, 'escstring') or exists(ast, 'rawstring'): 195 | return self.parseString(ast) 196 | elif exists(ast, 'symbol'): 197 | v = self.parseIdentifier(ast['symbol']) 198 | if self.symbols_as_strings: 199 | return v 200 | return Symbol(v) 201 | elif exists(ast, 'boolean'): 202 | return ast['boolean'] == 'true' 203 | elif exists(ast, 'null'): 204 | return None 205 | raise 'Unknown AST node! Internal failure: %r' % ast 206 | 207 | def parseString(self, ast): 208 | if exists(ast, 'escstring'): 209 | val = u'' 210 | for elem in ast['escstring']: 211 | if exists(elem, 'char'): 212 | val += elem['char'] 213 | elif exists(elem, 'escape'): 214 | esc = elem['escape'] 215 | if exists(esc, 'named'): 216 | val += namedEscapes[esc['named']] 217 | else: 218 | val += unichr(int(esc['unichar'], 16)) 219 | return val 220 | return ast['rawstring'] 221 | 222 | def parse(document, preserve_property_order=False, symbols_as_strings=False, dlist=None): 223 | parser = Parser(document, preserve_property_order, symbols_as_strings, dlist) 224 | return parser.document 225 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | TatSu>=4.4.0 2 | regex>=2021.4.4 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os.path, sys 3 | 4 | here = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | README = open(os.path.join(here, 'README.md'), 'r').read() 7 | if sys.version_info.major == 2: 8 | README = README.decode('utf-8') 9 | 10 | # This call to setup() does all the work 11 | setup( 12 | name='kdl-py', 13 | version='0.1.5', 14 | description='A Python library for the KDL Document Language.', 15 | long_description=README, 16 | long_description_content_type='text/markdown', 17 | url='https://github.com/daeken/kdl-py', 18 | author='Sera Brocious', 19 | author_email='sera.brocious@gmail.com', 20 | license='MIT', 21 | classifiers=[ 22 | 'License :: OSI Approved :: MIT License', 23 | 'Programming Language :: Python :: 3', 24 | 'Programming Language :: Python :: 2.7', 25 | ], 26 | packages=['kdl'], 27 | include_package_data=True, 28 | install_requires=['TatSu >= 4.4.0', 'regex >= 2021.4.4'], 29 | ) -------------------------------------------------------------------------------- /tests/complex.kdl: -------------------------------------------------------------------------------- 1 | omgwtfhax 2 | contents { 3 | section "First section" { 4 | paragraph name="This is the first paragraph" 5 | paragraph "This is the second paragraph 6 | with internal newline\tand an escape 7 | and a unicode escape \u{9}" 8 | } 9 | } 10 | "dpsojf" 0b1_01 11 | r#"123"# 12 | foo "bar" 0x123 :hax r#"foobar\podasjfpoj"# 123 123.4 123e-3 13 | -------------------------------------------------------------------------------- /tests/complex_formatted.kdl: -------------------------------------------------------------------------------- 1 | omgwtfhax 2 | contents { 3 | section "First section" { 4 | paragraph name="This is the first paragraph" 5 | paragraph "This is the second paragraph\nwith internal newline\tand an escape\nand a unicode escape \t" 6 | } 7 | } 8 | dpsojf 5 9 | "123" 10 | foo "bar" 291 :hax r#"foobar\podasjfpoj"# 123 123.4 0.123 -------------------------------------------------------------------------------- /tests/test_complex.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 4 | 5 | from kdl import parse 6 | 7 | def test_from_file(): 8 | with open('complex.kdl', 'r') as fp: 9 | doc = parse(fp) 10 | if sys.version_info.major == 3: 11 | with open('complex_formatted.kdl', 'r', encoding='utf-8') as fp: 12 | assert fp.read() == str(doc) 13 | else: 14 | with open('complex_formatted.kdl', 'r') as fp: 15 | assert fp.read().decode('utf-8') == str(doc) 16 | -------------------------------------------------------------------------------- /tests/test_solo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 | 7 | if sys.version_info.major == 3: 8 | unicode = str 9 | 10 | from kdl import parse, Symbol 11 | 12 | def test_empty(): 13 | doc = parse('') 14 | assert len(doc) == 0 15 | assert str(doc) == '' 16 | 17 | def test_bare_empty(): 18 | doc = parse('bare') 19 | assert len(doc) == 1 20 | node = doc[0] 21 | assert node.name == 'bare' 22 | assert len(list(node)) == 0 23 | assert str(doc) == 'bare' 24 | 25 | def test_bare_int_arg(): 26 | doc = parse('bare 123') 27 | assert len(doc) == 1 28 | node = doc[0] 29 | assert node.name == 'bare' 30 | assert len(list(node)) == 1 31 | assert node[0] == 123 32 | assert str(doc) == 'bare 123' 33 | 34 | def test_bare_float_arg(): 35 | doc = parse('bare 123.5') 36 | assert len(doc) == 1 37 | node = doc[0] 38 | assert node.name == 'bare' 39 | assert len(list(node)) == 1 40 | assert node[0] == 123.5 41 | assert str(doc) == 'bare 123.5' 42 | 43 | def test_bare_binary_arg(): 44 | doc = parse('bare 0b1010') 45 | assert len(doc) == 1 46 | node = doc[0] 47 | assert node.name == 'bare' 48 | assert len(list(node)) == 1 49 | assert node[0] == 0b1010 50 | assert str(doc) == 'bare 10' 51 | 52 | def test_bare_octal_arg(): 53 | doc = parse('bare 0o1237') 54 | assert len(doc) == 1 55 | node = doc[0] 56 | assert node.name == 'bare' 57 | assert len(list(node)) == 1 58 | assert node[0] == 0o1237 59 | assert str(doc) == 'bare 671' 60 | 61 | def test_bare_hex_arg(): 62 | doc = parse('bare 0xdeadbeef') 63 | assert len(doc) == 1 64 | node = doc[0] 65 | assert node.name == 'bare' 66 | assert len(list(node)) == 1 67 | assert node[0] == 0xdeadbeef 68 | assert str(doc) == 'bare 3735928559' 69 | 70 | def test_bare_int_us_arg(): 71 | doc = parse('bare 12_3') 72 | assert len(doc) == 1 73 | node = doc[0] 74 | assert node.name == 'bare' 75 | assert len(list(node)) == 1 76 | assert node[0] == 123 77 | assert str(doc) == 'bare 123' 78 | 79 | def test_bare_float_us_arg(): 80 | doc = parse('bare 12_3.5') 81 | assert len(doc) == 1 82 | node = doc[0] 83 | assert node.name == 'bare' 84 | assert len(list(node)) == 1 85 | assert node[0] == 123.5 86 | assert str(doc) == 'bare 123.5' 87 | 88 | def test_bare_binary_us_arg(): 89 | doc = parse('bare 0b1_010') 90 | assert len(doc) == 1 91 | node = doc[0] 92 | assert node.name == 'bare' 93 | assert len(list(node)) == 1 94 | assert node[0] == 0b1010 95 | assert str(doc) == 'bare 10' 96 | 97 | def test_bare_octal_us_arg(): 98 | doc = parse('bare 0o12_37') 99 | assert len(doc) == 1 100 | node = doc[0] 101 | assert node.name == 'bare' 102 | assert len(list(node)) == 1 103 | assert node[0] == 0o1237 104 | assert str(doc) == 'bare 671' 105 | 106 | def test_bare_hex_us_arg(): 107 | doc = parse('bare 0xdead_beef') 108 | assert len(doc) == 1 109 | node = doc[0] 110 | assert node.name == 'bare' 111 | assert len(list(node)) == 1 112 | assert node[0] == 0xdeadbeef 113 | assert str(doc) == 'bare 3735928559' 114 | 115 | def test_bare_true_arg(): 116 | doc = parse('bare true') 117 | assert len(doc) == 1 118 | node = doc[0] 119 | assert node.name == 'bare' 120 | assert len(list(node)) == 1 121 | assert node[0] == True 122 | assert str(doc) == 'bare true' 123 | 124 | def test_bare_false_arg(): 125 | doc = parse('bare false') 126 | assert len(doc) == 1 127 | node = doc[0] 128 | assert node.name == 'bare' 129 | assert len(list(node)) == 1 130 | assert node[0] == False 131 | assert str(doc) == 'bare false' 132 | 133 | def test_bare_null_arg(): 134 | doc = parse('bare null') 135 | assert len(doc) == 1 136 | node = doc[0] 137 | assert node.name == 'bare' 138 | assert len(list(node)) == 1 139 | assert node[0] is None 140 | assert str(doc) == 'bare null' 141 | 142 | def test_bare_string_symbol(): 143 | doc = parse('bare :"name goes here"') 144 | assert len(doc) == 1 145 | assert doc[0][0] == Symbol('name goes here') 146 | assert str(doc) == 'bare :"name goes here"' 147 | 148 | def test_bare_raw_string_symbol(): 149 | doc = parse('bare :r#"name\\goes\\here"#') 150 | assert len(doc) == 1 151 | assert doc[0][0] == Symbol('name\\goes\\here') 152 | assert str(doc) == 'bare :r#"name\\goes\\here"#' 153 | 154 | def test_bare_deep_raw_string_symbol(): 155 | doc = parse('bare :r####"name\\goes\\here"####') 156 | assert len(doc) == 1 157 | assert doc[0][0] == Symbol('name\\goes\\here') 158 | assert str(doc) == 'bare :r#"name\\goes\\here"#' 159 | 160 | def test_bare_plain_symbol(): 161 | assert str(parse('bare :foo') == 'bare :foo') 162 | assert str(parse('bare :"foo"') == 'bare :foo') 163 | assert str(parse('bare :r#"foo"#') == 'bare :foo') 164 | 165 | def test_symbol_comparison(): 166 | assert parse('bare :foo')[0][0] == Symbol('foo') 167 | assert parse('bare :foo')[0][0] == 'foo' 168 | assert parse('bare :foo')[0][0] != Symbol('bar') 169 | assert parse('bare :foo')[0][0] != 'bar' 170 | 171 | def test_commented_empty(): 172 | doc = parse('/-bare') 173 | assert len(doc) == 0 174 | assert str(doc) == '' 175 | 176 | def test_commented_args(): 177 | doc = parse('/-bare 1234 "foo"') 178 | assert len(doc) == 0 179 | assert str(doc) == '' 180 | 181 | def test_commented_with_children(): 182 | doc = parse('/-bare { }') 183 | assert len(doc) == 0 184 | assert str(doc) == '' 185 | 186 | def test_children(): 187 | doc = parse('bare { foo; bar; baz; }') 188 | assert len(doc) == 1 189 | node = doc[0] 190 | assert node.name == 'bare' 191 | assert len(list(node)) == 3 192 | assert node.children[0].name == 'foo' 193 | assert node.children[1].name == 'bar' 194 | assert node.children[2].name == 'baz' 195 | assert str(doc) == '''bare { 196 | foo 197 | bar 198 | baz 199 | }''' 200 | 201 | def test_commented_child(): 202 | doc = parse('bare { foo; /-bar; baz; }') 203 | assert len(doc) == 1 204 | node = doc[0] 205 | assert node.name == 'bare' 206 | assert len(list(node)) == 2 207 | assert node.children[0].name == 'foo' 208 | assert node.children[1].name == 'baz' 209 | assert str(doc) == '''bare { 210 | foo 211 | baz 212 | }''' 213 | 214 | def test_prop(): 215 | doc = parse('bare foo="bar"') 216 | assert len(doc) == 1 217 | node = doc[0] 218 | assert node.name == 'bare' 219 | assert len(list(node)) == 1 220 | assert node['foo'] == 'bar' 221 | assert str(doc) == 'bare foo="bar"' 222 | 223 | def test_string_name(): 224 | doc = parse('"name goes here"') 225 | assert len(doc) == 1 226 | assert doc[0].name == 'name goes here' 227 | assert str(doc) == '"name goes here"' 228 | 229 | def test_raw_string_name(): 230 | doc = parse('r#"name\\goes\\here"#') 231 | assert len(doc) == 1 232 | assert doc[0].name == 'name\\goes\\here' 233 | assert str(doc) == 'r#"name\\goes\\here"#' 234 | 235 | def test_deep_raw_string_name(): 236 | doc = parse('r####"name\\goes\\here"####') 237 | assert len(doc) == 1 238 | assert doc[0].name == 'name\\goes\\here' 239 | assert str(doc) == 'r#"name\\goes\\here"#' 240 | 241 | def test_plain_ident(): 242 | assert str(parse('"foo"')) == 'foo' 243 | assert str(parse('r#"foo"#')) == 'foo' 244 | 245 | def test_unicode_ws(): 246 | assert str(parse(u'foo\u3000:bar')) == 'foo :bar' 247 | assert str(parse(u'foo :bar')) == 'foo :bar' 248 | 249 | def test_unicode_ident(): 250 | assert unicode(parse(u'ノード')) == u'ノード' 251 | 252 | def test_unicode_prop_ident(): 253 | assert unicode(parse(u'foo お名前=5')) == u'foo お名前=5' 254 | 255 | def test_unicode_string(): 256 | assert unicode(parse(u'foo "☜(゚ヮ゚☜)"')) == u'foo "☜(゚ヮ゚☜)"' 257 | 258 | def test_unicode(): 259 | assert unicode(parse(u'ノード お名前="☜(゚ヮ゚☜)"')) == u'ノード お名前="☜(゚ヮ゚☜)"' 260 | 261 | def test_short_identifier(): 262 | assert str(parse('T') == 'T') 263 | 264 | def test_messy_identifiers(): 265 | assert str(parse('struct :Mod')) == 'struct :Mod' 266 | assert str(parse('stringref[:numFiles] :Files') == 'stringref[:numFiles] :Files') 267 | assert str(parse('Placeable[:numPlaceables] :Placeables') == 'Placeable[:numPlaceables] :Placeables') 268 | assert str(parse('foo :obj:stringTable[:index...]') == 'foo :obj:stringTable[:index...]') 269 | 270 | def test_empty_children(): 271 | doc = parse('foo { }') 272 | assert len(doc[0].children) == 0 273 | assert str(doc) == 'foo' 274 | doc = parse('foo {}') 275 | assert len(doc[0].children) == 0 276 | assert str(doc) == 'foo' 277 | --------------------------------------------------------------------------------