├── .coveragerc ├── .gitignore ├── .travis.install ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── NEWS ├── README ├── README.rst ├── bin ├── generate_parser ├── move_stage ├── remove_stage └── stage ├── doc ├── .gitignore ├── Makefile ├── _static │ └── listings │ │ ├── tutorial3-netstring-reversal.py │ │ ├── tutorial3-netstrings.py │ │ └── tutorial3-netstrings2.py ├── calc.py ├── community.rst ├── conf.py ├── extending.rst ├── index.rst ├── reference.rst ├── terml.rst ├── test_calc.py ├── tutorial.rst ├── tutorial2.rst └── tutorial3.rst ├── examples ├── 337141-steamcube.json ├── exceptions.py ├── iso8601.py ├── minml.py ├── parsley_json.py ├── protocol │ ├── netstring_reversal.py │ ├── netstrings.py │ └── test_netstrings.py ├── test_iso8601.py ├── test_parsley_json.py ├── trace_json.py └── trace_visualiser.py ├── ometa ├── __init__.py ├── _generated │ ├── __init__.py │ ├── parsley.py │ ├── parsley_termactions.py │ ├── parsley_tree_transformer.py │ ├── pymeta_v1.py │ ├── vm.py │ └── vm_emit.py ├── builder.py ├── compat.py ├── grammar.py ├── interp.py ├── parsley.parsley ├── parsley_termactions.parsley ├── parsley_tree_transformer.parsley ├── protocol.py ├── pymeta_v1.parsley ├── runtime.py ├── test │ ├── __init__.py │ ├── helpers.py │ ├── test_builder.py │ ├── test_protocol.py │ ├── test_pymeta.py │ ├── test_runtime.py │ ├── test_tube.py │ └── test_vm_builder.py ├── tube.py ├── vm.parsley ├── vm_builder.py └── vm_emit.parsley ├── parsley.py ├── setup.py ├── terml ├── README.txt ├── __init__.py ├── _generated │ ├── __init__.py │ ├── quasiterm.py │ └── terml.py ├── nodes.py ├── parser.py ├── qnodes.py ├── quasiterm.parsley ├── quasiterm.py ├── terml.parsley └── test │ ├── __init__.py │ ├── test_quasiterm.py │ └── test_terml.py └── test_parsley.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = 3 | ometa 4 | parsley.py 5 | examples/parsley_json.py 6 | examples/protocol/netstrings.py 7 | omit = 8 | ometa/_generated/* 9 | *test*.py 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /_trial_temp 3 | /.coverage 4 | /htmlcov 5 | /build 6 | /dist 7 | -------------------------------------------------------------------------------- /.travis.install: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON_VERSION=$(python --version 2>&1) 4 | 5 | if [[ "$PYTHON_VERSION" > "Python 3" ]]; then 6 | true 7 | else 8 | pip install twisted 9 | fi 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.6" 4 | - "2.7" 5 | - "3.3" 6 | 7 | install: 8 | - "bash .travis.install" 9 | - "pip install pytest coveralls pytz ." 10 | script: 11 | - "coverage run $(which py.test) ." 12 | after_success: 13 | - "coveralls" 14 | 15 | notifications: 16 | email: false 17 | irc: 18 | channels: 19 | - chat.freenode.org#parsley 20 | on_success: always 21 | on_failure: always 22 | use_notice: true 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2012 2 | Allen Short 3 | Waldemar Kornewald 4 | 5 | Soli Deo Gloria. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining 8 | a copy of this software and associated documentation files (the 9 | "Software"), to deal in the Software without restriction, including 10 | without limitation the rights to use, copy, modify, merge, publish, 11 | distribute, sublicense, and/or sell copies of the Software, and to 12 | permit persons to whom the Software is furnished to do so, subject to 13 | the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README 2 | include LICENSE 3 | include NEWS 4 | include terml/README.txt 5 | graft examples 6 | graft bin 7 | graft doc 8 | global-exclude *.pyc 9 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 1.3.0 (2015-09-08): 2 | - Added python 3 support. 3 | - Added an example of parsing ISO 8601. 4 | - Support rule arguments in parser protocols. 5 | 6 | 1.2.0 (2013-08-01): 7 | - Added 'makeProtocol' for parsing stream protocols using Parsley 8 | grammars and Twisted, with examples, docs, and a tutorial. 9 | - Added 'stack' as support for 'makeProtocol'. 10 | - Sped up parsing; started compiling python expressons only once. 11 | (thanks to eevee!) 12 | - Allowed tuple unpacking in assignment in rules. For example, 13 | `rule1 = rule2:(a, b) -> a + b`. (thanks to introom!) 14 | - Added a trace hook and an example of a trace visualizer using it. 15 | - Fixed an example in the README. (thanks to ilmarga!) 16 | - Added support for custom labels in grammars. For example, 17 | `rule1 = rule2 ^(foo)` will make Parsley consider 'foo' the name for 18 | 'rule2' in error messages. (thanks to miebster!) 19 | 20 | 1.1.0 (2013-03-03): 21 | - The 'spaces' rule has been provided under the alias 'ws'; 'spaces' 22 | is now the deprecated name. 23 | - Single quotes can now be used around multiple characters to match 24 | them consecutively. For example, 'abc' is equivalent 'a' 'b' 'c'. 25 | - The 'token' rule is thus deprecated as well, since "token('foo')" 26 | is more typing than "ws 'foo'". 27 | - Hex escapes can now be used in literals for matching bytes. 28 | - 'makeGrammar' now takes an 'unwrap' argument, to get a grammar 29 | class suitable for subclassing. 30 | - 'makeGrammar' also takes an 'extends' argument that can be either a 31 | regular python class or a wrapped Parsley grammar, for simple 32 | subclassing. 33 | - 'repeat' expressions now work for 0 repetitions properly. 34 | - ometa.runtime.TIMING flag added for enabling debug output of 35 | grammar load times. 36 | - OMeta version 1 parser moved to ometa.compat. 37 | - Twine removed. 38 | - Various bugfixes around error reporting. 39 | - Parsley no longer loads its internal grammars at import time. The 40 | 'stage' script in bin/ is used to regenerate the compiled versions 41 | after modification. 42 | - Experimental tree transformer grammars, with special syntax for 43 | destructuring terms and creating string templates, have been added. 44 | 45 | 1.0.0 (2012-10-01): 46 | - PyMeta is now Parsley. 47 | - A new public API has been added in the 'parsley' module. 48 | - Grammars now use the OMeta 2 syntax by default. Compatibility is 49 | available via ometa.grammar.OMeta1. 50 | - A generic AST-handling library has been added, 'terml'. 51 | - Grammars now parse to term trees instead of nested lists. The code 52 | generator has accordingly been greatly simplified. 53 | - A grammar interpreter has been added, which parses by evaluating 54 | the grammar term tree directly instead of generating a class. 55 | - A push parser has been added which accepts data in arbitrary 56 | chunks, maintaining state while waiting for more input. 57 | 58 | 0.4.0 (2010-05-15): 59 | - Builders now work from a grammar AST, rather than being driven by 60 | the grammar parser directly. 61 | - All parse operations now carry error information, including what 62 | was expected to be parsed and where. 63 | * Consequently, ParseError now has a 'formatError' method that takes 64 | the input string and returns a human-readable description the 65 | parse failure. 66 | - New example: TermL parser, in examples/terml. 67 | - New script, bin/generate_parser. Takes a file containing a PyMeta 68 | grammar as input and writes a Python module file. 69 | 70 | - A couple bugfixes: 71 | #248643 subclassed grammars don't inherit base namespace 72 | #564135 makeGrammar error handling for nonsensical grammars is broken 73 | 74 | 75 | 0.3.0 (2008-07-12): 76 | - Input stream now implemented like the Javascript version, with immutable 77 | position objects. 78 | 79 | 0.2.0 (2008-04-17): 80 | - Grammars generate Python source instead of ASTs. 81 | - OMeta now has a "metagrammarClass" attribute, to ease extension of 82 | the metagrammar. 83 | 84 | 0.1.1 (2008-03-27): 85 | - Fixed a bug in 'super'. 86 | 87 | 0.1 (2008-03-26): 88 | - Initial release. 89 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | =============================================================== 4 | Parsley: A Pattern-Matching Language Based on OMeta and Python 5 | =============================================================== 6 | 7 | You can read further docs at: http://parsley.readthedocs.org/en/latest/ 8 | 9 | Summary 10 | ------- 11 | 12 | Parsley is a parsing library for people who find parsers scary or 13 | annoying. I wrote it because I wanted to parse a programming language, 14 | and tools like PLY or ANTLR or Bison were very hard to understand and 15 | integrate into my Python code. Most parser generators are based on LL 16 | or LR parsing algorithms that compile to big state machine 17 | tables. It was like I had to wake up a different section of my brain 18 | to understand or work on grammar rules. 19 | 20 | Parsley, like pyparsing and ZestyParser, uses the PEG algorithm, so 21 | each expression in the grammar rules works like a Python 22 | expression. In particular, alternatives are evaluated in order, unlike 23 | table-driven parsers such as yacc, bison or PLY. 24 | 25 | Parsley is an implementation of OMeta, an object-oriented 26 | pattern-matching language developed by Alessandro Warth at 27 | http://tinlizzie.org/ometa/ . For further reading, see Warth's PhD 28 | thesis, which provides a detailed description of OMeta: 29 | http://www.vpri.org/pdf/tr2008003_experimenting.pdf 30 | 31 | How It Works 32 | ------------ 33 | 34 | Parsley compiles a grammar to a Python class, with the rules as methods. The 35 | rules specify parsing expressions, which consume input and return values if 36 | they succeed in matching. 37 | 38 | Basic syntax 39 | ~~~~~~~~~~~~ 40 | ``foo = ....``: 41 | Define a rule named foo. 42 | 43 | ``expr1 expr2``: 44 | Match expr1, and then match expr2 if it succeeds, returning the value of 45 | expr2. Like Python's ``and``. 46 | 47 | ``expr1 | expr2``: 48 | Try to match ``expr1`` --- if it fails, match ``expr2`` instead. Like Python's 49 | ``or``. 50 | 51 | ``expr*``: 52 | Match ``expr`` zero or more times, returning a list of matches. 53 | 54 | ``expr+``: 55 | Match ``expr`` one or more times, returning a list of matches. 56 | 57 | ``expr?``: 58 | Try to match ``expr``. Returns ``None`` if it fails to match. 59 | 60 | ``expr{n, m}``: 61 | Match ``expr`` at least ``n`` times, and no more than ``m`` times. 62 | 63 | ``expr{n}``: 64 | Match ``expr`` ``n`` times exactly. 65 | 66 | ``~expr``: 67 | Negative lookahead. Fails if the next item in the input matches 68 | ``expr``. Consumes no input. 69 | 70 | ``~~expr``: 71 | Positive lookahead. Fails if the next item in the input does *not* 72 | match ``expr``. Consumes no input. 73 | 74 | ``ruleName`` or ``ruleName(arg1 arg2 etc)``: 75 | Call the rule ``ruleName``, possibly with args. 76 | 77 | ``'x'``: 78 | Match the literal character 'x'. 79 | 80 | ````: 81 | Returns the string consumed by matching ``expr``. Good for tokenizing rules. 82 | 83 | ``expr:name``: 84 | Bind the result of expr to the local variable ``name``. 85 | 86 | ``-> pythonExpression``: 87 | Evaluate the given Python expression and return its result. Can be 88 | used inside parentheses too! 89 | 90 | ``!(pythonExpression)``: 91 | Invoke a Python expression as an action. 92 | 93 | ``?(pythonExpression)``: 94 | Fail if the Python expression is false, Returns True otherwise. 95 | 96 | Comments like Python comments are supported as well, starting with # 97 | and extending to the end of the line. 98 | 99 | Interface 100 | --------- 101 | 102 | The starting point for defining a new grammar is 103 | ``parsley.makeGrammar(grammarSource, bindings)``, which takes a grammar 104 | definition and a dict of variable bindings for its embedded 105 | expressions and produces a Python class. Grammars can be subclassed as 106 | usual, and makeGrammar can be called on these classes to override 107 | rules and provide new ones. Grammar rules are exposed as methods. 108 | 109 | Example Usage 110 | ------------- 111 | 112 | :: 113 | 114 | from parsley import makeGrammar 115 | exampleGrammar = """ 116 | ones = '1' '1' -> 1 117 | twos = '2' '2' -> 2 118 | stuff = (ones | twos)+ 119 | """ 120 | Example = makeGrammar(exampleGrammar, {}) 121 | g = Example("11221111") 122 | result = g.stuff() 123 | print result 124 | 125 | → ``[1, 2, 1, 1]`` 126 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | README -------------------------------------------------------------------------------- /bin/generate_parser: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python -*- 3 | 4 | import sys 5 | from ometa.runtime import ParseError 6 | from ometa.grammar import OMeta 7 | from ometa.builder import writePython 8 | from ometa.vm_builder import writeBytecode, bytecodeToPython 9 | 10 | if len(sys.argv) != 3: 11 | print "Usage: generate_grammar grammar-filename python-filename" 12 | sys.exit(1) 13 | 14 | 15 | def findObject(name): 16 | classSplit = name.split('.') 17 | modname = '.'.join(classSplit[:-1]) 18 | topLevel = __import__(modname) 19 | packages = modname.split(".")[1:] 20 | module = topLevel 21 | for p in packages: 22 | module = getattr(module, p) 23 | return getattr(module, classSplit[-1]) 24 | 25 | grammar = findObject(sys.argv[1]) 26 | pythonFile = open(sys.argv[2], 'w') 27 | g = OMeta(grammar) 28 | tree = g.parseGrammar("Parser") 29 | # source = writePython(tree) 30 | bytecode = writeBytecode(tree) 31 | source = bytecodeToPython(bytecode) 32 | pythonFile.write("from ometa.runtime import OMetaBase as GrammarBase\n") 33 | pythonFile.write(source) 34 | -------------------------------------------------------------------------------- /bin/move_stage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python -*- 3 | import os 4 | import shutil 5 | 6 | 7 | def main(): 8 | 9 | bin_dir = os.path.dirname(os.path.realpath(__file__)) 10 | parsley_dir = os.path.split(bin_dir)[0] 11 | stage_dir = os.path.join(parsley_dir, 'stage') 12 | if not os.path.exists(stage_dir): 13 | raise Exception('Stage dir does not exist') 14 | 15 | move_pkg_gen(parsley_dir, stage_dir, 'ometa') 16 | move_pkg_gen(parsley_dir, stage_dir, 'terml') 17 | 18 | 19 | def move_pkg_gen(parsley_dir, stage_dir, pkg): 20 | print "\nMoving " + pkg 21 | stage_gen_dir = os.path.join(stage_dir, pkg, '_generated') 22 | gen_dir = os.path.join(parsley_dir, pkg, '_generated') 23 | 24 | if os.path.exists(gen_dir): 25 | print "Removing " + os.path.relpath(gen_dir, parsley_dir) 26 | shutil.rmtree(gen_dir) 27 | 28 | print 'Copying ' + os.path.relpath(stage_gen_dir, parsley_dir) + ' to ' + os.path.relpath(gen_dir, parsley_dir) 29 | shutil.copytree(stage_gen_dir, gen_dir) 30 | 31 | 32 | if __name__ == '__main__': 33 | main() 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /bin/remove_stage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python -*- 3 | import os 4 | import shutil 5 | 6 | 7 | def main(): 8 | 9 | bin_dir = os.path.dirname(os.path.realpath(__file__)) 10 | parsley_dir = os.path.split(bin_dir)[0] 11 | stage_dir = os.path.join(parsley_dir, 'stage') 12 | if os.path.exists(stage_dir): 13 | print 'Removing stage dir' 14 | shutil.rmtree(stage_dir) 15 | 16 | if __name__ == '__main__': 17 | main() 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /bin/stage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode: python -*- 3 | import sys 4 | import os 5 | import glob 6 | import shutil 7 | from ometa.runtime import ParseError 8 | from ometa.grammar import OMeta, TreeTransformerGrammar 9 | from ometa.builder import writePython 10 | 11 | 12 | def main(): 13 | bin_dir = os.path.dirname(os.path.realpath(__file__)) 14 | parsley_dir = os.path.split(bin_dir)[0] 15 | stage_dir = os.path.join(parsley_dir, 'stage') 16 | sys.path.append(parsley_dir) 17 | 18 | if os.path.exists(stage_dir): 19 | print "Removing stage dir" 20 | shutil.rmtree(stage_dir) 21 | 22 | os.mkdir(stage_dir) 23 | stage_pkg(parsley_dir, stage_dir, 'ometa') 24 | stage_pkg(parsley_dir, stage_dir, 'terml') 25 | 26 | 27 | def stage_pkg(parsley_dir, stage_dir, pkg): 28 | print "\nCopying", pkg 29 | stage_pkg_dir = os.path.join(stage_dir, pkg) 30 | stage_gen_dir = os.path.join(stage_pkg_dir, '_generated') 31 | pkg_dir = os.path.join(parsley_dir, pkg) 32 | 33 | shutil.copytree(pkg_dir, stage_pkg_dir, 34 | ignore=lambda src, names: [n for n in names if n.endswith('pyc')]) 35 | for filename in glob.glob(pkg_dir + "/*.parsley"): 36 | grammar = open(filename).read() 37 | grammarname = os.path.split(filename)[1].split('.')[0] 38 | pyfn = os.path.join(stage_gen_dir, grammarname + '.py') 39 | print "{src:38} => {dst}".format(src=os.path.relpath(filename, parsley_dir), 40 | dst=os.path.relpath(pyfn, parsley_dir)) 41 | if grammar.startswith('#TreeTransformer'): 42 | g = TreeTransformerGrammar(grammar) 43 | else: 44 | g = OMeta(grammar) 45 | tree = g.parseGrammar(grammarname) 46 | source = writePython(tree, grammar) 47 | pythonFile = open(pyfn, 'w') 48 | pythonFile.write(source) 49 | 50 | 51 | if __name__ == '__main__': 52 | main() 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Parsley.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Parsley.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Parsley" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Parsley" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /doc/_static/listings/tutorial3-netstring-reversal.py: -------------------------------------------------------------------------------- 1 | from twisted.internet.defer import Deferred 2 | from twisted.internet.endpoints import TCP4ServerEndpoint 3 | from twisted.internet.protocol import Factory 4 | from twisted.internet.task import react 5 | 6 | from parsley import makeProtocol, stack 7 | 8 | 9 | grammar = """ 10 | nonzeroDigit = digit:x ?(x != '0') 11 | digits = <'0' | nonzeroDigit digit*>:i -> int(i) 12 | 13 | netstring = digits:length ':' :string ',' -> string 14 | 15 | receiveNetstring = netstring:string -> receiver.netstringReceived(string) 16 | """ 17 | 18 | 19 | class NetstringReversalWrapper(object): 20 | def __init__(self, wrapped): 21 | self.wrapped = wrapped 22 | 23 | def sendNetstring(self, string): 24 | self.wrapped.sendNetstring(string[::-1]) 25 | 26 | 27 | class NetstringSender(object): 28 | def __init__(self, transport): 29 | self.transport = transport 30 | 31 | def sendNetstring(self, string): 32 | self.transport.write('%d:%s,' % (len(string), string)) 33 | 34 | 35 | class NetstringSplittingWrapper(object): 36 | def __init__(self, wrapped): 37 | self.wrapped = wrapped 38 | 39 | def netstringReceived(self, string): 40 | splitpoint = len(string) // 2 41 | self.wrapped.netstringFirstHalfReceived(string[:splitpoint]) 42 | self.wrapped.netstringSecondHalfReceived(string[splitpoint:]) 43 | 44 | def __getattr__(self, attr): 45 | return getattr(self.wrapped, attr) 46 | 47 | 48 | class SplitNetstringReceiver(object): 49 | currentRule = 'receiveNetstring' 50 | 51 | def __init__(self, sender): 52 | self.sender = sender 53 | 54 | def prepareParsing(self, parser): 55 | pass 56 | 57 | def finishParsing(self, reason): 58 | pass 59 | 60 | def netstringFirstHalfReceived(self, string): 61 | self.sender.sendNetstring(string) 62 | 63 | def netstringSecondHalfReceived(self, string): 64 | pass 65 | 66 | pass # begin protocol definition 67 | NetstringProtocol = makeProtocol( 68 | grammar, 69 | stack(NetstringReversalWrapper, NetstringSender), 70 | stack(NetstringSplittingWrapper, SplitNetstringReceiver)) 71 | 72 | class NetstringFactory(Factory): 73 | protocol = NetstringProtocol 74 | 75 | 76 | def main(reactor): 77 | server = TCP4ServerEndpoint(reactor, 1234) 78 | d = server.listen(NetstringFactory()) 79 | d.addCallback(lambda p: Deferred()) # listen forever 80 | return d 81 | 82 | 83 | react(main, []) 84 | -------------------------------------------------------------------------------- /doc/_static/listings/tutorial3-netstrings.py: -------------------------------------------------------------------------------- 1 | from twisted.internet.defer import Deferred 2 | from twisted.internet.endpoints import TCP4ServerEndpoint 3 | from twisted.internet.protocol import Factory 4 | from twisted.internet.task import react 5 | 6 | from parsley import makeProtocol 7 | 8 | 9 | grammar = """ 10 | nonzeroDigit = digit:x ?(x != '0') 11 | digits = <'0' | nonzeroDigit digit*>:i -> int(i) 12 | 13 | netstring = digits:length ':' :string ',' -> string 14 | receiveNetstring = netstring:string -> receiver.netstringReceived(string) 15 | """ 16 | 17 | 18 | class NetstringSender(object): 19 | def __init__(self, transport): 20 | self.transport = transport 21 | 22 | def sendNetstring(self, string): 23 | self.transport.write('%d:%s,' % (len(string), string)) 24 | 25 | 26 | class NetstringReceiver(object): 27 | currentRule = 'receiveNetstring' 28 | 29 | def __init__(self, sender): 30 | self.sender = sender 31 | 32 | def prepareParsing(self, parser): 33 | pass 34 | 35 | def finishParsing(self, reason): 36 | pass 37 | 38 | def netstringReceived(self, string): 39 | self.sender.sendNetstring(string) 40 | 41 | 42 | NetstringProtocol = makeProtocol( 43 | grammar, NetstringSender, NetstringReceiver) 44 | 45 | 46 | class NetstringFactory(Factory): 47 | protocol = NetstringProtocol 48 | 49 | 50 | def main(reactor): 51 | server = TCP4ServerEndpoint(reactor, 1234) 52 | d = server.listen(NetstringFactory()) 53 | d.addCallback(lambda p: Deferred()) # listen forever 54 | return d 55 | 56 | 57 | react(main, []) 58 | -------------------------------------------------------------------------------- /doc/_static/listings/tutorial3-netstrings2.py: -------------------------------------------------------------------------------- 1 | from twisted.internet.defer import Deferred 2 | from twisted.internet.endpoints import TCP4ServerEndpoint 3 | from twisted.internet.protocol import Factory 4 | from twisted.internet.task import react 5 | 6 | from parsley import makeProtocol 7 | 8 | 9 | grammar = """ 10 | nonzeroDigit = digit:x ?(x != '0') 11 | digits = <'0' | nonzeroDigit digit*>:i -> int(i) 12 | netstring :delimiter = digits:length delimiter :string ',' -> string 13 | 14 | colon = digits:length ':' :string ',' -> receiver.netstringReceived(':', string) 15 | semicolon = digits:length ';' :string ',' -> receiver.netstringReceived(';', string) 16 | """ 17 | 18 | 19 | class NetstringSender(object): 20 | def __init__(self, transport): 21 | self.transport = transport 22 | 23 | def sendNetstring(self, string): 24 | print 'received', repr(string) 25 | 26 | 27 | class NetstringReceiver(object): 28 | currentRule = 'colon' 29 | 30 | def __init__(self, sender): 31 | self.sender = sender 32 | 33 | def prepareParsing(self, parser): 34 | pass 35 | 36 | def finishParsing(self, reason): 37 | reason.printTraceback() 38 | 39 | def netstringReceived(self, delimiter, string): 40 | self.sender.sendNetstring(string) 41 | if delimiter == ':': 42 | self.currentRule = 'semicolon' 43 | else: 44 | self.currentRule = 'colon' 45 | 46 | 47 | NetstringProtocol = makeProtocol( 48 | grammar, NetstringSender, NetstringReceiver) 49 | 50 | 51 | class NetstringFactory(Factory): 52 | protocol = NetstringProtocol 53 | 54 | 55 | def main(reactor): 56 | server = TCP4ServerEndpoint(reactor, 1234) 57 | d = server.listen(NetstringFactory()) 58 | d.addCallback(lambda p: Deferred()) # listen forever 59 | return d 60 | 61 | 62 | react(main, []) 63 | -------------------------------------------------------------------------------- /doc/calc.py: -------------------------------------------------------------------------------- 1 | import math 2 | from parsley import makeGrammar 3 | 4 | def calculate(start, pairs): 5 | result = start 6 | for op, value in pairs: 7 | if op == '+': 8 | result += value 9 | elif op == '-': 10 | result -= value 11 | elif op == '*': 12 | result *= value 13 | elif op == '/': 14 | result /= value 15 | return result 16 | 17 | calcGrammar = """ 18 | number = :ds -> int(ds) 19 | parens = '(' ws expr:e ws ')' -> e 20 | value = number | parens 21 | ws = ' '* 22 | add = '+' ws expr2:n -> ('+', n) 23 | sub = '-' ws expr2:n -> ('-', n) 24 | mul = '*' ws value:n -> ('*', n) 25 | div = '/' ws value:n -> ('/', n) 26 | 27 | addsub = ws (add | sub) 28 | muldiv = ws (mul | div) 29 | 30 | expr = expr2:left addsub*:right -> calculate(left, right) 31 | expr2 = value:left muldiv*:right -> calculate(left, right) 32 | """ 33 | 34 | Calc = makeGrammar(calcGrammar, {"calculate": calculate}, name="Calc") 35 | 36 | calcGrammarEx = """ 37 | value = super | constant 38 | constant = 'pi' -> math.pi 39 | | 'e' -> math.e 40 | """ 41 | CalcEx = makeGrammar(calcGrammarEx, {"math": math}, name="CalcEx", 42 | extends=Calc) 43 | -------------------------------------------------------------------------------- /doc/community.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Community 3 | ========= 4 | 5 | You can participate in Parsley development in a couple of places: 6 | 7 | * `Source code`_ 8 | * `Issue tracker`_ 9 | * `Documentation`_ 10 | 11 | Wherever we interact, we adhere to the `Contributor Covenant`_. 12 | 13 | .. _`Source code`: https://github.com/pyga/parsley 14 | .. _`Issue tracker`: https://github.com/pyga/parsley/issues 15 | .. _`Documentation`: https://parsley.readthedocs.io/ 16 | .. _`Contributor Covenant`: https://pyga.github.io/ 17 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Parsley documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Aug 21 09:46:24 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'Parsley' 44 | copyright = u'2013, Allen Short' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '1.3' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '1.3' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'Parsleydoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'Parsley.tex', u'Parsley Documentation', 187 | u'Allen Short', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'parsley', u'Parsley Documentation', 217 | [u'Allen Short'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'Parsley', u'Parsley Documentation', 231 | u'Allen Short', 'Parsley', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | -------------------------------------------------------------------------------- /doc/extending.rst: -------------------------------------------------------------------------------- 1 | ================================== 2 | Extending Grammars and Inheritance 3 | ================================== 4 | 5 | :warning: Unfinished 6 | 7 | Another feature taken from OMeta is *grammar inheritance*. We can 8 | write a grammar with rules that override ones in a parent. If we load 9 | the grammar from our calculator tutorial as ``Calc``, we can extend it 10 | with some constants:: 11 | 12 | from parsley import makeGrammar 13 | import math 14 | import calc 15 | calcGrammarEx = """ 16 | value = super | constant 17 | constant = 'pi' -> math.pi 18 | | 'e' -> math.e 19 | """ 20 | CalcEx = makeGrammar(calcGrammar, {"math": math}, extends=calc.Calc) 21 | 22 | 23 | Invoking the rule ``super`` calls the rule ``value`` in Calc. If it 24 | fails to match, our new ``value`` rule attempts to match a constant 25 | name. 26 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. Parsley documentation master file, created by 2 | sphinx-quickstart on Tue Aug 21 09:46:24 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Parsley's documentation! 7 | =================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | tutorial 15 | tutorial2 16 | tutorial3 17 | extending 18 | terml 19 | reference 20 | community 21 | -------------------------------------------------------------------------------- /doc/reference.rst: -------------------------------------------------------------------------------- 1 | Parsley Reference 2 | ----------------- 3 | 4 | Basic syntax 5 | ~~~~~~~~~~~~ 6 | ``foo = ....``: 7 | Define a rule named foo. 8 | 9 | ``expr1 expr2``: 10 | Match expr1, and then match expr2 if it succeeds, returning the value of 11 | expr2. Like Python's ``and``. 12 | 13 | ``expr1 | expr2``: 14 | Try to match ``expr1`` --- if it fails, match ``expr2`` instead. Like Python's 15 | ``or``. 16 | 17 | ``expr*``: 18 | Match ``expr`` zero or more times, returning a list of matches. 19 | 20 | ``expr+``: 21 | Match ``expr`` one or more times, returning a list of matches. 22 | 23 | ``expr?``: 24 | Try to match ``expr``. Returns ``None`` if it fails to match. 25 | 26 | ``expr{n, m}``: 27 | Match ``expr`` at least ``n`` times, and no more than ``m`` times. 28 | 29 | ``expr{n}``: 30 | Match ``expr`` ``n`` times exactly. 31 | 32 | ``~expr``: 33 | Negative lookahead. Fails if the next item in the input matches 34 | ``expr``. Consumes no input. 35 | 36 | ``~~expr``: 37 | Positive lookahead. Fails if the next item in the input does *not* 38 | match ``expr``. Consumes no input. 39 | 40 | ``ruleName`` or ``ruleName(arg1 arg2 etc)``: 41 | Call the rule ``ruleName``, possibly with args. 42 | 43 | ``'x'``: 44 | Match the literal character 'x'. 45 | 46 | ````: 47 | Returns the string consumed by matching ``expr``. Good for tokenizing rules. 48 | 49 | ``expr:name``: 50 | Bind the result of expr to the local variable ``name``. 51 | 52 | ``-> pythonExpression``: 53 | Evaluate the given Python expression and return its result. Can be 54 | used inside parentheses too! 55 | 56 | ``!(pythonExpression)``: 57 | Invoke a Python expression as an action. 58 | 59 | ``?(pythonExpression)``: 60 | Fail if the Python expression is false, Returns True otherwise. 61 | 62 | ``expr ^(CustomLabel)``: 63 | If the expr fails, the exception raised will contain CustomLabel. 64 | Good for providing more context when a rule is broken. 65 | CustomLabel can contain any character other than "(" and ")". 66 | 67 | Comments like Python comments are supported as well, starting with # 68 | and extending to the end of the line. 69 | 70 | 71 | Python API 72 | ~~~~~~~~~~ 73 | .. automodule:: parsley 74 | :members: 75 | 76 | 77 | Protocol parsing API 78 | ==================== 79 | 80 | .. py:module:: ometa.protocol 81 | 82 | .. py:class:: ParserProtocol 83 | 84 | The Twisted ``Protocol`` subclass used for :ref:`parsing stream protocols 85 | using Parsley `. It has two public attributes: 86 | 87 | .. py:attribute:: sender 88 | 89 | After the connection is established, this attribute will refer to the 90 | sender created by the sender factory of the :class:`ParserProtocol`. 91 | 92 | .. py:attribute:: receiver 93 | 94 | After the connection is established, this attribute will refer to the 95 | receiver created by the receiver factory of the :class:`ParserProtocol`. 96 | 97 | It's common to also add a ``factory`` attribute to the 98 | :class:`ParserProtocol` from its factory's ``buildProtocol`` method, but 99 | this isn't strictly required or guaranteed to be present. 100 | 101 | Subclassing or instantiating :class:`ParserProtocol` is not necessary; 102 | :func:`~parsley.makeProtocol` is sufficient and requires less boilerplate. 103 | 104 | .. _receivers: 105 | 106 | .. py:class:: Receiver 107 | 108 | :class:`Receiver` is not a real class but is used here for demonstration 109 | purposes to indicate the required API. 110 | 111 | .. py:attribute:: currentRule 112 | 113 | :class:`ParserProtocol` examines the :attr:`currentRule` attribute at the 114 | beginning of parsing as well as after every time a rule has completely 115 | matched. At these times, the rule with the same name as the value of 116 | :attr:`currentRule` will be selected to start parsing the incoming stream 117 | of data. 118 | 119 | .. py:method:: prepareParsing(parserProtocol) 120 | 121 | :meth:`prepareParsing` is called after the :class:`.ParserProtocol` has 122 | established a connection, and is passed the :class:`.ParserProtocol` 123 | instance itself. 124 | 125 | :param parserProtocol: An instance of :class:`.ProtocolParser`. 126 | 127 | .. py:method:: finishParsing(reason) 128 | 129 | :meth:`finishParsing` is called if an exception was raised during 130 | parsing, or when the :class:`.ParserProtocol` has lost its connection, 131 | whichever comes first. It will only be called once. 132 | 133 | An exception raised during parsing can be due to incoming data that 134 | doesn't match the current rule or an exception raised calling python code 135 | during matching. 136 | 137 | :param reason: A `Failure`_ encapsulating the reason parsing has ended. 138 | 139 | .. _senders: 140 | 141 | Senders do not have any required API as :class:`ParserProtocol` will never call 142 | methods on a sender. 143 | 144 | 145 | Built-in Parsley Rules 146 | ~~~~~~~~~~~~~~~~~~~~~~ 147 | 148 | ``anything``: 149 | Matches a single character from the input. 150 | 151 | ``letter``: 152 | Matches a single ASCII letter. 153 | 154 | ``digit``: 155 | Matches a decimal digit. 156 | 157 | ``letterOrDigit``: 158 | Combines the above. 159 | 160 | ``end``: 161 | Matches the end of input. 162 | 163 | ``ws``: 164 | Matches zero or more spaces, tabs, or newlines. 165 | 166 | ``exactly(char)``: 167 | Matches the character `char`. 168 | 169 | 170 | .. _Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html 171 | -------------------------------------------------------------------------------- /doc/terml.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | TermL 3 | ===== 4 | 5 | TermL ("term-ell") is the Term Language, a small expression-based language for 6 | representing arbitrary data in a simple structured format. It is ideal for 7 | expressing abstract syntax trees (ASTs) and other kinds of primitive data 8 | trees. 9 | 10 | Creating Terms 11 | ============== 12 | 13 | :: 14 | 15 | >>> from terml.nodes import termMaker as t 16 | >>> t.Term() 17 | term('Term') 18 | 19 | That's it! We've created an empty term, `Term`, with nothing inside. 20 | 21 | :: 22 | 23 | >>> t.Num(1) 24 | term('Num(1)') 25 | >>> t.Outer(t.Inner()) 26 | term('Outer(Inner)') 27 | 28 | 29 | We can see that terms are not just `namedtuple` lookalikes. They have their 30 | own internals and store data in a slightly different and more structured way 31 | than a normal tuple. 32 | 33 | Parsing Terms 34 | ============= 35 | 36 | Parsley can parse terms from streams. Terms can contain any kind of parseable 37 | data, including other terms. Returning to the ubiquitous calculator example:: 38 | 39 | add = Add(:x, :y) -> x + y 40 | 41 | Here this rule matches a term called `Add` which has two components, bind 42 | those components to a couple of names (`x` and `y`), and return their sum. If 43 | this rule were applied to a term like `Add(3, 5)`, it would return 8. 44 | 45 | Terms can be nested, too. Here's an example that performs a slightly contrived 46 | match on a negated term inside an addition:: 47 | 48 | add_negate = Add(:x, Negate(:y)) -> x - y 49 | -------------------------------------------------------------------------------- /doc/test_calc.py: -------------------------------------------------------------------------------- 1 | from calc import Calc 2 | import unittest 3 | 4 | class CalcTest(unittest.TestCase): 5 | 6 | def test_calc(self): 7 | self.assertEqual(Calc("2 * (3 + 4 * 5)").expr(), 46) 8 | self.assertEqual(Calc("2 *( 3 + 40 / 5)").expr(), 22) 9 | self.assertEqual(Calc("2 + (4 * 3 + 40 / 5)").expr(), 22) 10 | -------------------------------------------------------------------------------- /doc/tutorial.rst: -------------------------------------------------------------------------------- 1 | ========================================== 2 | Parsley Tutorial Part I: Basics and Syntax 3 | ========================================== 4 | 5 | ************************************* 6 | From Regular Expressions To Grammars 7 | ************************************* 8 | 9 | Parsley is a pattern matching and parsing tool for Python programmers. 10 | 11 | Most Python programmers are familiar with regular expressions, as 12 | provided by Python's `re` module. To use it, you provide a string that 13 | describes the pattern you want to match, and your input. 14 | 15 | For example:: 16 | 17 | >>> import re 18 | >>> x = re.compile("a(b|c)d+e") 19 | >>> x.match("abddde") 20 | <_sre.SRE_Match object at 0x7f587af54af8> 21 | 22 | 23 | You can do exactly the same sort of thing in Parsley:: 24 | 25 | >>> import parsley 26 | >>> x = parsley.makeGrammar("foo = 'a' ('b' | 'c') 'd'+ 'e'", {}) 27 | >>> x("abdde").foo() 28 | 'e' 29 | 30 | From this small example, a couple differences between regular 31 | expressions and Parsley grammars can be seen: 32 | 33 | Parsley Grammars Have Named Rules 34 | --------------------------------- 35 | 36 | A Parsley grammar can have many rules, and each has a name. The 37 | example above has a single rule named `foo`. Rules can call each 38 | other; calling rules in Parsley works like calling functions in 39 | Python. Here is another way to write the grammar above:: 40 | 41 | foo = 'a' baz 'd'+ 'e' 42 | baz = 'b' | 'c' 43 | 44 | 45 | Parsley Grammars Are Expressions 46 | -------------------------------- 47 | 48 | Calling `match` for a regular expression returns a match object if the 49 | match succeeds or None if it fails. Parsley parsers return the value 50 | of last expression in the rule. Behind the scenes, Parsley turns each 51 | rule in your grammar into Python methods. In pseudo-Python code, it 52 | looks something like this:: 53 | 54 | def foo(self): 55 | match('a') 56 | self.baz() 57 | match_one_or_more('d') 58 | return match('e') 59 | 60 | def baz(self): 61 | return match('b') or match('c') 62 | 63 | The value of the last expression in the rule is what the rule 64 | returns. This is why our example returns 'e'. 65 | 66 | The similarities to regular expressions pretty much end here, 67 | though. Having multiple named rules composed of expressions makes for 68 | a much more powerful tool, and now we're going to look at some more 69 | features that go even further. 70 | 71 | Rules Can Embed Python Expressions 72 | ---------------------------------- 73 | 74 | Since these rules just turn into Python code eventually, we can stick 75 | some Python code into them ourselves. This is particularly useful for 76 | changing the return value of a rule. The Parsley expression for this 77 | is `->`. We can also bind the results of expressions to variable names 78 | and use them in Python code. So things like this are possible:: 79 | 80 | x = parsley.makeGrammar(""" 81 | foo = 'a':one baz:two 'd'+ 'e' -> (one, two) 82 | baz = 'b' | 'c' 83 | """, {}) 84 | print x("abdde").foo() 85 | 86 | :: 87 | 88 | ('a', 'b') 89 | 90 | Literal match expressions like `'a'` return the character they 91 | match. Using a colon and a variable name after an expression is like 92 | assignment in Python. As a result, we can use those names in a Python 93 | expression - in this case, creating a tuple. 94 | 95 | Another way to use Python code in a rule is to write custom tests for 96 | matching. Sometimes it's more convenient to write some Python that 97 | determines if a rule matches than to stick to Parsley expressions 98 | alone. For those cases, we can use `?()`. Here, we use the builtin 99 | rule `anything` to match a single character, then a Python predicate 100 | to decide if it's the one we want:: 101 | 102 | digit = anything:x ?(x in '0123456789') -> x 103 | 104 | This rule `digit` will match any decimal digit. We need the `-> x` on 105 | the end to return the character rather than the value of the predicate 106 | expression, which is just `True`. 107 | 108 | Repeated Matches Make Lists 109 | --------------------------- 110 | 111 | Like regular expressions, Parsley supports repeating matches. You can 112 | match an expression zero or more times with '* ', one or more times 113 | with '+', and a specific number of times with '{n, m}' or just 114 | '{n}'. Since all expressions in Parsley return a value, these 115 | repetition operators return a list containing each match they made. 116 | 117 | :: 118 | 119 | x = parsley.makeGrammar(""" 120 | digit = anything:x ?(x in '0123456789') -> x 121 | number = digit+ 122 | """, {}) 123 | print x("314159").number() 124 | 125 | :: 126 | 127 | ['3', '1', '4', '1', '5', '9'] 128 | 129 | The `number` rule repeatedly matches `digit` and collects the matches 130 | into a list. This gets us part way to turning a string like `314159` 131 | into an integer. All we need now is to turn the list back into a 132 | string and call `int()`:: 133 | 134 | x = parsley.makeGrammar(""" 135 | digit = anything:x ?(x in '0123456789') -> x 136 | number = digit+:ds -> int(''.join(ds)) 137 | """, {}) 138 | print x("8675309").number() 139 | 140 | :: 141 | 142 | 8675309 143 | 144 | Collecting Chunks Of Input 145 | -------------------------- 146 | 147 | If it seemed kind of strange to break our input string up into a list 148 | and then reassemble it into a string using `join`, you're not 149 | alone. Parsley has a shortcut for this since it's a common case: you 150 | can use `<>` around a rule to make it return the slice of input it 151 | consumes, ignoring the actual return value of the rule. For example:: 152 | 153 | x = parsley.makeGrammar(""" 154 | digit = anything:x ?(x in '0123456789') 155 | number = :ds -> int(ds) 156 | """, {}) 157 | print x("11235").number() 158 | 159 | :: 160 | 161 | 11235 162 | 163 | Here, `` returns the string `"11235"`, since that's the 164 | portion of the input that `digit+` matched. (In this case it's the 165 | entire input, but we'll see some more complex cases soon.) Since it 166 | ignores the list returned by `digit+`, leaving the `-> x` out of 167 | `digit` doesn't change the result. 168 | 169 | ********************** 170 | Building A Calculator 171 | ********************** 172 | 173 | Now let's look at using these rules in a more complicated parser. We 174 | have support for parsing numbers; let's do addition, as well. 175 | :: 176 | 177 | x = parsley.makeGrammar(""" 178 | digit = anything:x ?(x in '0123456789') 179 | number = :ds -> int(ds) 180 | expr = number:left ( '+' number:right -> left + right 181 | | -> left) 182 | """, {}) 183 | print x("17+34").expr() 184 | print x("18").expr() 185 | 186 | :: 187 | 188 | 51 189 | 18 190 | 191 | Parentheses group expressions just like in Python. the '`|`' operator 192 | is like `or` in Python - it short-circuits. It tries each expression 193 | until it finds one that matches. For `"17+34"`, the `number` rule 194 | matches "17", then Parsley tries to match `+` followed by another 195 | `number`. Since "+" and "34" are the next things in the input, those 196 | match, and it then runs the Python expression `left + right` and 197 | returns its value. For the input `"18"` it does the same, but `+` does 198 | not match, so Parsley tries the next thing after `|`. Since this is 199 | just a Python expression, the match succeeds and the number 18 is 200 | returned. 201 | 202 | Now let's add subtraction:: 203 | 204 | digit = anything:x ?(x in '0123456789') 205 | number = :ds -> int(ds) 206 | expr = number:left ( '+' number:right -> left + right 207 | | '-' number:right -> left - right 208 | | -> left) 209 | 210 | This will accept things like '5-4' now. 211 | 212 | Since parsing numbers is so common and useful, Parsley actually has 213 | 'digit' as a builtin rule, so we don't even need to define it 214 | ourselves. We'll leave it out in further examples and rely on the 215 | version Parsley provides. 216 | 217 | Normally we like to allow whitespace in our expressions, so let's add 218 | some support for spaces:: 219 | 220 | number = :ds -> int(ds) 221 | ws = ' '* 222 | expr = number:left ws ('+' ws number:right -> left + right 223 | |'-' ws number:right -> left - right 224 | | -> left) 225 | 226 | Now we can handle "17 +34", "2 - 1", etc. 227 | 228 | We could go ahead and add multiplication and division here (and 229 | hopefully it's obvious how that would work), but let's complicate 230 | things further and allow multiple operations in our expressions -- 231 | things like "1 - 2 + 3". 232 | 233 | There's a couple different ways to do this. Possibly the easiest is to 234 | build a list of numbers and operations, then do the math.:: 235 | 236 | x = parsley.makeGrammar(""" 237 | number = :ds -> int(ds) 238 | ws = ' '* 239 | add = '+' ws number:n -> ('+', n) 240 | sub = '-' ws number:n -> ('-', n) 241 | addsub = ws (add | sub) 242 | expr = number:left (addsub+:right -> right 243 | | -> left) 244 | """, {}) 245 | print x("1 + 2 - 3").expr() 246 | 247 | :: 248 | 249 | [('+', 2), ('-, 3)] 250 | 251 | Oops, this is only half the job done. We're collecting the operators 252 | and values, but now we need to do the actual calculation. The easiest 253 | way to do it is probably to write a Python function and call it from 254 | inside the grammar. 255 | 256 | So far we have been passing an empty dict as the second argument to 257 | ``makeGrammar``. This is a dict of variable bindings that can be used 258 | in Python expressions in the grammar. So we can pass Python objects, 259 | such as functions, this way:: 260 | 261 | def calculate(start, pairs): 262 | result = start 263 | for op, value in pairs: 264 | if op == '+': 265 | result += value 266 | elif op == '-': 267 | result -= value 268 | return result 269 | x = parsley.makeGrammar(""" 270 | number = :ds -> int(ds) 271 | ws = ' '* 272 | add = '+' ws number:n -> ('+', n) 273 | sub = '-' ws number:n -> ('-', n) 274 | addsub = ws (add | sub) 275 | expr = number:left (addsub+:right -> calculate(left, right) 276 | | -> left) 277 | """, {"calculate": calculate}) 278 | print x("4 + 5 - 6").expr() 279 | 280 | :: 281 | 282 | 3 283 | 284 | 285 | Introducing this function lets us simplify even further: instead of 286 | using ``addsub+``, we can use ``addsub*``, since ``calculate(left, [])`` 287 | will return ``left`` -- so now ``expr`` becomes:: 288 | 289 | expr = number:left addsub*:right -> calculate(left, right) 290 | 291 | 292 | So now let's look at adding multiplication and division. Here, we run 293 | into precedence rules: should "4 * 5 + 6" give us 26, or 44? The 294 | traditional choice is for multiplication and division to take 295 | precedence over addition and subtraction, so the answer should 296 | be 26. We'll resolve this by making sure multiplication and division 297 | happen before addition and subtraction are considered:: 298 | 299 | def calculate(start, pairs): 300 | result = start 301 | for op, value in pairs: 302 | if op == '+': 303 | result += value 304 | elif op == '-': 305 | result -= value 306 | elif op == '*': 307 | result *= value 308 | elif op == '/': 309 | result /= value 310 | return result 311 | x = parsley.makeGrammar(""" 312 | number = :ds -> int(ds) 313 | ws = ' '* 314 | add = '+' ws expr2:n -> ('+', n) 315 | sub = '-' ws expr2:n -> ('-', n) 316 | mul = '*' ws number:n -> ('*', n) 317 | div = '/' ws number:n -> ('/', n) 318 | 319 | addsub = ws (add | sub) 320 | muldiv = ws (mul | div) 321 | 322 | expr = expr2:left addsub*:right -> calculate(left, right) 323 | expr2 = number:left muldiv*:right -> calculate(left, right) 324 | """, {"calculate": calculate}) 325 | print x("4 * 5 + 6").expr() 326 | 327 | :: 328 | 329 | 26 330 | 331 | Notice particularly that ``add``, ``sub``, and ``expr`` all call the 332 | ``expr2`` rule now where they called ``number`` before. This means 333 | that all the places where a number was expected previously, a 334 | multiplication or division expression can appear instead. 335 | 336 | 337 | Finally let's add parentheses, so you can override the precedence and 338 | write "4 * (5 + 6)" when you do want 44. We'll do this by adding a 339 | ``value`` rule that accepts either a number or an expression in 340 | parentheses, and replace existing calls to ``number`` with calls to 341 | ``value``. 342 | 343 | :: 344 | 345 | def calculate(start, pairs): 346 | result = start 347 | for op, value in pairs: 348 | if op == '+': 349 | result += value 350 | elif op == '-': 351 | result -= value 352 | elif op == '*': 353 | result *= value 354 | elif op == '/': 355 | result /= value 356 | return result 357 | x = parsley.makeGrammar(""" 358 | number = :ds -> int(ds) 359 | parens = '(' ws expr:e ws ')' -> e 360 | value = number | parens 361 | ws = ' '* 362 | add = '+' ws expr2:n -> ('+', n) 363 | sub = '-' ws expr2:n -> ('-', n) 364 | mul = '*' ws value:n -> ('*', n) 365 | div = '/' ws value:n -> ('/', n) 366 | 367 | addsub = ws (add | sub) 368 | muldiv = ws (mul | div) 369 | 370 | expr = expr2:left addsub*:right -> calculate(left, right) 371 | expr2 = value:left muldiv*:right -> calculate(left, right) 372 | """, {"calculate": calculate}) 373 | 374 | print x("4 * (5 + 6) + 1").expr() 375 | 376 | :: 377 | 378 | 45 379 | 380 | And there you have it: a four-function calculator with precedence and 381 | parentheses. 382 | -------------------------------------------------------------------------------- /doc/tutorial2.rst: -------------------------------------------------------------------------------- 1 | ================================================= 2 | Parsley Tutorial Part II: Parsing Structured Data 3 | ================================================= 4 | 5 | Now that you are familiar with the basics of Parsley syntax, let's 6 | look at a more realistic example: a JSON parser. 7 | 8 | The JSON spec on http://json.org/ describes the format, and we can 9 | adapt its description to a parser. We'll write the Parsley rules in 10 | the same order as the grammar rules in the right sidebar on the JSON 11 | site, starting with the top-level rule, 'object'. 12 | :: 13 | 14 | object = ws '{' members:m ws '}' -> dict(m) 15 | 16 | Parsley defines a builtin rule ``ws`` which consumes any spaces, tabs, 17 | or newlines it can. 18 | 19 | Since JSON objects are represented in Python as dicts, and ``dict`` 20 | takes a list of pairs, we need a rule to collect name/value pairs 21 | inside an object expression. 22 | :: 23 | 24 | members = (pair:first (ws ',' pair)*:rest -> [first] + rest) 25 | | -> [] 26 | 27 | This handles the three cases for object contents: one, multiple, or 28 | zero pairs. A name/value pair is separated by a colon. We use the 29 | builtin rule ``spaces`` to consume any whitespace after the colon:: 30 | 31 | pair = ws string:k ws ':' value:v -> (k, v) 32 | 33 | Arrays, similarly, are sequences of array elements, and are 34 | represented as Python lists. 35 | :: 36 | 37 | array = '[' elements:xs ws ']' -> xs 38 | elements = (value:first (ws ',' value)*:rest -> [first] + rest) | -> [] 39 | 40 | Values can be any JSON expression. 41 | :: 42 | 43 | value = ws (string | number | object | array 44 | | 'true' -> True 45 | | 'false' -> False 46 | | 'null' -> None) 47 | 48 | 49 | Strings are sequences of zero or more characters between double 50 | quotes. Of course, we need to deal with escaped characters as 51 | well. This rule introduces the operator ``~``, which does negative 52 | lookahead; if the expression following it succeeds, its parse will 53 | fail. If the expression fails, the rest of the parse continues. Either 54 | way, no input will be consumed. 55 | :: 56 | 57 | string = '"' (escapedChar | ~'"' anything)*:c '"' -> ''.join(c) 58 | 59 | This is a common pattern, so let's examine it step by step. This will 60 | match leading whitespace and then a double quote character. It then 61 | matches zero or more characters. If it's not an ``escapedChar`` (which 62 | will start with a backslash), we check to see if it's a double quote, 63 | in which case we want to end the loop. If it's not a double quote, we 64 | match it using the rule ``anything``, which accepts a single character 65 | of any kind, and continue. Finally, we match the ending double quote 66 | and return the characters in the string. We cannot use the ``<>`` 67 | syntax in this case because we don't want a literal slice of the input 68 | -- we want escape sequences to be replaced with the character they 69 | represent. 70 | 71 | It's very common to use ``~`` for "match until" situations where you 72 | want to keep parsing only until an end marker is found. Similarly, 73 | ``~~`` is positive lookahead: it succeed if its expression succeeds 74 | but not consume any input. 75 | 76 | The ``escapedChar`` rule should not be too surprising: we match a 77 | backslash then whatever escape code is given. 78 | 79 | :: 80 | 81 | escapedChar = '\\' (('"' -> '"') |('\\' -> '\\') 82 | |('/' -> '/') |('b' -> '\b') 83 | |('f' -> '\f') |('n' -> '\n') 84 | |('r' -> '\r') |('t' -> '\t') 85 | |('\'' -> '\'') | escapedUnicode) 86 | 87 | Unicode escapes (of the form ``\u2603``) require matching four hex 88 | digits, so we use the repetition operator ``{}``, which works like + 89 | or * except taking either a ``{min, max}`` pair or simply a 90 | ``{number}`` indicating the exact number of repetitions. 91 | :: 92 | 93 | hexdigit = :x ?(x in '0123456789abcdefABCDEF') -> x 94 | escapedUnicode = 'u' :hs -> unichr(int(hs, 16)) 95 | 96 | With strings out of the way, we advance to numbers, both integer and 97 | floating-point. 98 | 99 | :: 100 | 101 | number = spaces ('-' | -> ''):sign (intPart:ds (floatPart(sign ds) 102 | | -> int(sign + ds))) 103 | 104 | Here we vary from the json.org description a little and move sign 105 | handling up into the ``number`` rule. We match either an ``intPart`` 106 | followed by a ``floatPart`` or just an ``intPart`` by itself. 107 | :: 108 | 109 | digit = :x ?(x in '0123456789') -> x 110 | digits = 111 | digit1_9 = :x ?(x in '123456789') -> x 112 | 113 | intPart = (digit1_9:first digits:rest -> first + rest) | digit 114 | floatPart :sign :ds = <('.' digits exponent?) | exponent>:tail 115 | -> float(sign + ds + tail) 116 | exponent = ('e' | 'E') ('+' | '-')? digits 117 | 118 | In JSON, multi-digit numbers cannot start with 0 (since that is 119 | Javascript's syntax for octal numbers), so ``intPart`` uses ``digit1_9`` 120 | to exclude it in the first position. 121 | 122 | The ``floatPart`` rule takes two parameters, ``sign`` and ``ds``. Our 123 | ``number`` rule passes values for these when it invokes ``floatPart``, 124 | letting us avoid duplication of work within the rule. Note that 125 | pattern matching on arguments to rules works the same as on the string 126 | input to the parser. In this case, we provide no pattern, just a name: 127 | ``:ds`` is the same as ``anything:ds``. 128 | 129 | (Also note that our float rule cheats a little: it does not really 130 | parse floating-point numbers, it merely recognizes them and passes 131 | them to Python's ``float`` builtin to actually produce the value.) 132 | 133 | The full version of this parser and its test cases can be found in the 134 | ``examples`` directory in the Parsley distribution. 135 | -------------------------------------------------------------------------------- /doc/tutorial3.rst: -------------------------------------------------------------------------------- 1 | .. _protocol-parsing: 2 | 3 | =============================================== 4 | Parsley Tutorial Part III: Parsing Network Data 5 | =============================================== 6 | 7 | This tutorial assumes basic knowledge of writing `Twisted`_ `TCP clients`_ or 8 | `servers`_. 9 | 10 | 11 | Basic parsing 12 | ------------- 13 | 14 | Parsing data that comes in over the network can be difficult due to that there 15 | is no guarantee of receiving whole messages. Buffering is often complicated by 16 | protocols switching between using fixed-width messages and delimiters for 17 | framing. Fortunately, Parsley can remove all of this tedium. 18 | 19 | With :func:`parsley.makeProtocol`, Parsley can generate a `Twisted`_ 20 | `IProtocol`_-implementing class which will match incoming network data using 21 | Parsley grammar rules. Before getting started with :func:`.makeProtocol`, let's 22 | build a grammar for `netstrings`_. The netstrings protocol is very simple:: 23 | 24 | 4:spam,4:eggs, 25 | 26 | This stream contains two netstrings: ``spam``, and ``eggs``. The data is 27 | prefixed with one or more ASCII digits followed by a ``:``, and suffixed with a 28 | ``,``. So, a Parsley grammar to match a netstring would look like: 29 | 30 | .. literalinclude:: _static/listings/tutorial3-netstrings.py 31 | :start-after: grammar = 32 | :end-before: receiveNetstring 33 | 34 | :func:`.makeProtocol` takes, in addition to a grammar, a factory for a "sender" 35 | and a factory for a "receiver". In the system of objects managed by the 36 | :class:`.ParserProtocol`, the sender is in charge of writing data to the wire, 37 | and the receiver has methods called on it by the Parsley rules. To demonstrate 38 | it, here is the final piece needed in the Parsley grammar for netstrings: 39 | 40 | .. literalinclude:: _static/listings/tutorial3-netstrings.py 41 | :start-after: netstring = 42 | :end-before: """ 43 | 44 | The receiver is always available in Parsley rules with the name ``receiver``, 45 | allowing Parsley rules to call methods on it. 46 | 47 | When data is received over the wire, the :class:`.ParserProtocol` tries to 48 | match the received data against the current rule. If the current rule requires 49 | more data to finish matching, the :class:`.ParserProtocol` stops and waits 50 | until more data comes in, then tries to continue matching. This repeats until 51 | the current rule is completely matched, and then the :class:`.ParserProtocol` 52 | starts matching any leftover data against the current rule again. 53 | 54 | One specifies the current rule by setting a :attr:`.currentRule` attribute on 55 | the receiver, which the :class:`.ParserProtocol` looks at before doing any 56 | parsing. Changing the current rule is addressed in the :ref:`Switching rules 57 | ` section. 58 | 59 | Since the :class:`.ParserProtocol` will never modify the :attr:`.currentRule` 60 | attribute itself, the default behavior is to keep using the same rule. Parsing 61 | netstrings doesn't require any rule changing, so, the default behavior of 62 | continuing to use the same rule is fine. 63 | 64 | Both the sender factory and receiver factory are constructed when the 65 | :class:`.ParserProtocol`'s connection is established. The sender factory is a 66 | one-argument callable which will be passed the :class:`.ParserProtocol`'s 67 | `Transport`_. This allows the sender to send data over the transport. For 68 | example: 69 | 70 | .. literalinclude:: _static/listings/tutorial3-netstrings.py 71 | :pyobject: NetstringSender 72 | 73 | The receiver factory is another one-argument callable which is passed the 74 | constructed sender. The returned object must at least have 75 | :meth:`.prepareParsing` and :meth:`.finishParsing` methods. 76 | :meth:`.prepareParsing` is called with the :class:`.ParserProtocol` instance 77 | when a connection is established (i.e. in the ``connectionMade`` of the 78 | :class:`.ParserProtocol`) and :meth:`.finishParsing` is called when a 79 | connection is closed (i.e. in the ``connectionLost`` of the 80 | :class:`.ParserProtocol`). 81 | 82 | .. note:: 83 | Both the receiver factory and its returned object's :meth:`.prepareParsing` 84 | are called at in the :class:`.ParserProtocol`'s ``connectionMade`` method; 85 | this separation is for ease of testing receivers. 86 | 87 | To demonstrate a receiver, here is a simple receiver that receives netstrings 88 | and echos the same netstrings back: 89 | 90 | .. literalinclude:: _static/listings/tutorial3-netstrings.py 91 | :pyobject: NetstringReceiver 92 | 93 | Putting it all together, the Protocol is constructed using the grammar, sender 94 | factory, and receiver factory: 95 | 96 | .. literalinclude:: _static/listings/tutorial3-netstrings.py 97 | :start-after: self.sender.sendNetstring 98 | :end-before: class 99 | 100 | :download:`The complete script is also available for download. 101 | <_static/listings/tutorial3-netstrings.py>` 102 | 103 | 104 | Intermezzo: error reporting 105 | --------------------------- 106 | 107 | If an exception is raised from within Parsley during parsing, whether it's due 108 | to input not matching the current rule or an exception being raised from code 109 | the grammar calls, the connection will be immediately closed. The traceback 110 | will be captured as a `Failure`_ and passed to the :meth:`.finishParsing` 111 | method of the receiver. 112 | 113 | At present, there is no way to recover from failure. 114 | 115 | 116 | Composing senders and receivers 117 | ------------------------------- 118 | 119 | The design of senders and receivers is intentional to make composition easy: no 120 | subclassing is required. While the composition is easy enough to do on your 121 | own, Parsley provides a function: :func:`.stack`. It takes a base factory 122 | followed by zero or more wrappers. 123 | 124 | Its use is extremely simple: ``stack(x, y, z)`` will return a callable suitable 125 | either as a sender or receiver factory which will, when called with an 126 | argument, return ``x(y(z(argument)))``. 127 | 128 | An example of wrapping a sender factory: 129 | 130 | .. literalinclude:: _static/listings/tutorial3-netstring-reversal.py 131 | :pyobject: NetstringReversalWrapper 132 | 133 | And then, constructing the Protocol:: 134 | 135 | NetstringProtocol = makeProtocol( 136 | grammar, 137 | stack(NetstringReversalWrapper, NetstringSender), 138 | NetstringReceiver) 139 | 140 | A wrapper doesn't need to call the same methods on the thing it's wrapping. 141 | Also note that in most cases, it's important to forward unknown methods on to 142 | the wrapped object. An example of wrapping a receiver: 143 | 144 | .. literalinclude:: _static/listings/tutorial3-netstring-reversal.py 145 | :pyobject: NetstringSplittingWrapper 146 | 147 | The corresponding receiver and again, constructing the Protocol: 148 | 149 | .. literalinclude:: _static/listings/tutorial3-netstring-reversal.py 150 | :pyobject: SplitNetstringReceiver 151 | 152 | .. literalinclude:: _static/listings/tutorial3-netstring-reversal.py 153 | :start-after: begin protocol definition 154 | :end-before: SplitNetstringReceiver 155 | 156 | :download:`The complete script is also available for download. 157 | <_static/listings/tutorial3-netstring-reversal.py>` 158 | 159 | 160 | .. _switching-rules: 161 | 162 | Switching rules 163 | --------------- 164 | 165 | As mentioned before, it's possible to change the current rule. Imagine a 166 | "netstrings2" protocol that looks like this:: 167 | 168 | 3:foo,3;bar,4:spam,4;eggs, 169 | 170 | That is, the protocol alternates between using ``:`` and using ``;`` delimiting 171 | data length and the data. The amended grammar would look something like this: 172 | 173 | .. literalinclude:: _static/listings/tutorial3-netstrings2.py 174 | :start-after: grammar = 175 | :end-before: """ 176 | 177 | Changing the current rule is as simple as changing the :attr:`.currentRule` 178 | attribute on the receiver. So, the ``netstringReceived`` method could look like 179 | this: 180 | 181 | .. literalinclude:: _static/listings/tutorial3-netstrings2.py 182 | :pyobject: NetstringReceiver.netstringReceived 183 | 184 | While changing the :attr:`.currentRule` attribute can be done at any time, the 185 | :class:`.ParserProtocol` only examines the :attr:`.currentRule` at the 186 | beginning of parsing and after a rule has finished matching. As a result, if 187 | the :attr:`.currentRule` changes, the :class:`.ParserProtocol` will wait until 188 | the current rule is completely matched before switching rules. 189 | 190 | :download:`The complete script is also available for download. 191 | <_static/listings/tutorial3-netstrings2.py>` 192 | 193 | 194 | .. _Twisted: http://twistedmatrix.com/trac/ 195 | .. _TCP clients: http://twistedmatrix.com/documents/current/core/howto/clients.html 196 | .. _servers: http://twistedmatrix.com/documents/current/core/howto/servers.html 197 | .. _IProtocol: http://twistedmatrix.com/documents/current/api/twisted.internet.interfaces.IProtocol.html 198 | .. _netstrings: http://cr.yp.to/proto/netstrings.txt 199 | .. _Transport: http://twistedmatrix.com/documents/current/api/twisted.internet.interfaces.ITransport.html 200 | .. _Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html 201 | -------------------------------------------------------------------------------- /examples/337141-steamcube.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 337141, 4 | "model": "addons.addon", 5 | "fields": { 6 | "dev_agreement": false, 7 | "eula": null, 8 | "last_updated": "2011-10-18 16:28:24", 9 | "view_source": true, 10 | "enable_thankyou": false, 11 | "total_downloads": 0, 12 | "premium_type": 0, 13 | "app_slug": "something-something", 14 | "developer_comments": null, 15 | "_current_version": 1268829, 16 | "average_daily_downloads": 0, 17 | "_backup_version": null, 18 | "manifest_url": "http://micropipes.com/temp/steamcube.webapp", 19 | "app_domain": "micropipes.com", 20 | "admin_review_type": 1, 21 | "the_future": null, 22 | "trusted": false, 23 | "total_contributions": null, 24 | "locale_disambiguation": null, 25 | "guid": "214b364a-b419-4b26-8d50-6873bfd9e431", 26 | "weekly_downloads": 9999, 27 | "support_url": null, 28 | "disabled_by_user": false, 29 | "paypal_id": "", 30 | "average_rating": 0.0, 31 | "wants_contributions": false, 32 | "average_daily_users": 0, 33 | "bayesian_rating": 0.0, 34 | "share_count": 0, 35 | "ts_slowness": null, 36 | "homepage": null, 37 | "support_email": null, 38 | "public_stats": false, 39 | "status": 4, 40 | "privacy_policy": 2326784, 41 | "description": null, 42 | "default_locale": "en-US", 43 | "target_locale": null, 44 | "suggested_amount": null, 45 | "get_satisfaction_product": null, 46 | "prerelease": false, 47 | "thankyou_note": null, 48 | "admin_review": false, 49 | "auto_repackage": true, 50 | "slug": "app-337141", 51 | "external_software": false, 52 | "highest_status": 4, 53 | "get_satisfaction_company": null, 54 | "name": 2425897, 55 | "created": "2011-10-18 16:28:24", 56 | "type": 11, 57 | "icon_type": "image/png", 58 | "annoying": 0, 59 | "modified": "2011-10-18 16:29:46", 60 | "summary": 2425898, 61 | "nomination_message": null, 62 | "site_specific": false, 63 | "charity": null, 64 | "total_reviews": 0, 65 | "the_reason": null, 66 | "hotness": 0.0, 67 | "latest_version": 1268829 68 | } 69 | }, 70 | { 71 | "pk": 1268829, 72 | "model": "versions.version", 73 | "fields": { 74 | "has_info_request": false, 75 | "license": null, 76 | "created": "2011-10-18 16:28:24", 77 | "has_editor_comment": false, 78 | "releasenotes": null, 79 | "approvalnotes": "", 80 | "modified": "2011-10-18 16:28:24", 81 | "version": "1.0", 82 | "version_int": 1000000200100, 83 | "reviewed": null, 84 | "nomination": null, 85 | "addon": 337141 86 | } 87 | }, 88 | { 89 | "pk": 1, 90 | "model": "files.platform", 91 | "fields": { 92 | "modified": "2008-04-07 08:16:55", 93 | "created": "2007-03-05 13:09:27" 94 | } 95 | }, 96 | { 97 | "pk": 81555, 98 | "model": "files.file", 99 | "fields": { 100 | "status": 4, 101 | "codereview": false, 102 | "hash": "sha256:3808b13ef8341378b9c8305ca648200954ee7dcd8dce09fef55f2673458bc31f", 103 | "created": "2009-10-21 09:58:52", 104 | "modified": "2009-10-21 10:02:38", 105 | "filename": "steamcube.webapp", 106 | "platform": 1, 107 | "version": 1268829, 108 | "size": 388096, 109 | "datestatuschanged": "2009-10-21 09:58:40" 110 | } 111 | }, 112 | { 113 | "pk": 2527085, 114 | "model": "translations.translation", 115 | "fields": { 116 | "localized_string_clean": null, 117 | "created": "2011-10-18 16:28:24", 118 | "locale": "en-US", 119 | "modified": "2011-10-18 16:28:57", 120 | "id": 2425897, 121 | "localized_string": "Something Something Steamcube!" 122 | } 123 | }, 124 | { 125 | "pk": 2527086, 126 | "model": "translations.translation", 127 | "fields": { 128 | "localized_string_clean": "A simple 2.5D brain teaser block puzzle game. Find out how far can you get before time runs out?", 129 | "created": "2011-10-18 16:28:24", 130 | "locale": "en-US", 131 | "modified": "2011-10-18 16:28:57", 132 | "id": 2425898, 133 | "localized_string": "A simple 2.5D brain teaser block puzzle game. Find out how far can you get before time runs out?" 134 | } 135 | }, 136 | { 137 | "pk": 2527087, 138 | "model": "translations.translation", 139 | "fields": { 140 | "localized_string_clean": "", 141 | "created": "2011-10-18 16:28:57", 142 | "locale": "en-US", 143 | "modified": "2011-10-18 16:28:57", 144 | "id": 2425899, 145 | "localized_string": "" 146 | } 147 | }, 148 | { 149 | "pk": 2425898, 150 | "model": "translations.translation", 151 | "fields": { 152 | "localized_string_clean": "", 153 | "created": "2011-07-26 14:16:26", 154 | "locale": "en-US", 155 | "modified": "2011-07-26 14:16:26", 156 | "id": 2326782, 157 | "localized_string": null 158 | } 159 | }, 160 | { 161 | "pk": 2425900, 162 | "model": "translations.translation", 163 | "fields": { 164 | "localized_string": "We sell your ish \r\nhttp://omg.org/yes", 165 | "created": "2007-04-05 08:08:48", 166 | "locale": "en-US", 167 | "modified": "2009-03-26 07:41:10", 168 | "id": 2326784, 169 | "localized_string_clean": null 170 | } 171 | }, 172 | 173 | { 174 | "pk": 31337, 175 | "model": "auth.user", 176 | "fields": { 177 | "username": "steamcube@mozilla.com", 178 | "first_name": "Leet", 179 | "last_name": "User", 180 | "is_active": true, 181 | "is_superuser": false, 182 | "is_staff": false, 183 | "last_login": "2010-05-20 13:00:37", 184 | "groups": [], 185 | "user_permissions": [], 186 | "password": "", 187 | "email": "steamcube@mozilla.com", 188 | "date_joined": "2007-03-05 13:09:56" 189 | } 190 | }, 191 | { 192 | "pk": 31337, 193 | "model": "users.userprofile", 194 | "fields": { 195 | "display_collections_fav": false, 196 | "display_collections": false, 197 | "averagerating": "3.11", 198 | "confirmationcode": "", 199 | "notifycompat": true, 200 | "picture_type": "", 201 | "occupation": "", 202 | "homepage": "", 203 | "email": "steamcube@mozilla.com", 204 | "location": "", 205 | "bio": null, 206 | "deleted": false, 207 | "emailhidden": true, 208 | "user": 31337, 209 | "password": "sha512$7b5436061f8c0902088c292c057be69fdb17312e2f71607c9c51641f5d876522$08d1d370d89e2ae92755fd03464a7276ca607c431d04a52d659f7a184f3f9918073637d82fc88981c7099c7c46a1137b9fdeb675304eb98801038905a9ee0600", 210 | "username": "31337", 211 | "display_name": "31337 \u0627\u0644\u062a\u0637\u0628", 212 | "resetcode_expires": null, 213 | "resetcode": "", 214 | "created": "2007-03-05 13:09:56", 215 | "notes": null, 216 | "modified": "2010-05-19 16:41:22", 217 | "notifyevents": true 218 | } 219 | }, 220 | { 221 | "pk": 2818, 222 | "model": "addons.addonuser", 223 | "fields": { 224 | "position": 0, 225 | "addon": 337141, 226 | "role": 5, 227 | "listed": true, 228 | "user": 31337 229 | } 230 | } 231 | ] 232 | -------------------------------------------------------------------------------- /examples/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | A grammar for parsing a tiny HTML-like language, plus a transformer for it. 3 | """ 4 | from parsley import makeGrammar, term, termMaker as t, unwrapGrammar 5 | from itertools import chain 6 | 7 | tinyHTMLGrammar = r""" 8 | 9 | name = 10 | 11 | tag = ((('<' spaces name:n spaces attribute*:attrs '>') 12 | html:c 13 | ('<' '/' token(n) spaces '>') 14 | -> t.Element(n.lower(), dict(attrs), c))) ^ (valid tag) 15 | 16 | html = (text | tag)* 17 | 18 | text = <(~('<') anything)+> 19 | 20 | attribute = spaces name:k token('=') quotedString:v -> (k, v) 21 | 22 | quotedString = ((('"' | '\''):q <(~exactly(q) anything)*>:xs exactly(q)) 23 | -> xs) 24 | 25 | """ 26 | TinyHTML = makeGrammar(tinyHTMLGrammar, globals(), name="TinyHTML") 27 | 28 | testSource = "Yes

Man, HTML is great.

How could you even think otherwise?

A Good Website" 29 | 30 | print(unwrapGrammar(TinyHTML)(testSource).apply('tag')) 31 | 32 | # The "tag" rule uses the custom label construct "^ (valid tag)". 33 | # When this rule fails, the exception raised will say 34 | # "expected a valid tag". 35 | # 36 | # Yes

Man, HTML is great.

How could you even think otherwise?

A Good Website 37 | # ^ 38 | # Parse error at line 1, column 5: expected a valid tag. trail: [name attribute tag] 39 | -------------------------------------------------------------------------------- /examples/iso8601.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pytz 3 | 4 | from parsley import makeGrammar 5 | 6 | # See www.ietf.org/rfc/rfc3339.txt 7 | 8 | _iso_8601_definition = r""" 9 | year = :Y -> int(Y) 10 | month = :m -> int(m) 11 | day = :d -> int(d) 12 | 13 | hour = :H -> int(H) 14 | minute = :M -> int(M) 15 | second = :S -> int(S) 16 | fraction = '.' :frac -> int(float('0.' + frac) * 10 ** 6) 17 | 18 | sign = ('-' -> -1) | ('+' -> 1) 19 | numeric_offset = sign:s hour:h ':' minute:m -> FixedOffset(s * (h * 60 + m)) 20 | utc = 'Z' -> UTC 21 | offset = utc | numeric_offset 22 | 23 | naive_time = hour:h ':' minute:m ':' second:s (fraction | -> 0):ms 24 | -> time(h, m, s, ms) 25 | time = naive_time:t offset:o -> t.replace(tzinfo=o) 26 | date = year:y '-' month:m '-' day:d -> date(y, m, d) 27 | 28 | datetime = date:d 'T' time:t -> datetime.combine(d, t) 29 | """ 30 | 31 | 32 | DateTimeParser = makeGrammar( 33 | _iso_8601_definition, 34 | { 35 | 'FixedOffset': pytz.FixedOffset, 36 | 'date': datetime.date, 37 | 'time': datetime.time, 38 | 'datetime': datetime.datetime, 39 | 'UTC': pytz.UTC, 40 | }, 41 | ) 42 | -------------------------------------------------------------------------------- /examples/minml.py: -------------------------------------------------------------------------------- 1 | """ 2 | A grammar for parsing a tiny HTML-like language, plus a transformer for it. 3 | """ 4 | from parsley import makeGrammar, term, termMaker as t 5 | from itertools import chain 6 | 7 | tinyHTMLGrammar = r""" 8 | 9 | name = 10 | 11 | tag = ('<' spaces name:n spaces attribute*:attrs '>' 12 | html:c 13 | '<' '/' token(n) spaces '>' 14 | -> t.Element(n.lower(), dict(attrs), c)) 15 | 16 | html = (text | tag)* 17 | 18 | text = <(~('<') anything)+> 19 | 20 | attribute = spaces name:k token('=') quotedString:v -> (k, v) 21 | 22 | quotedString = (('"' | '\''):q <(~exactly(q) anything)*>:xs exactly(q)) 23 | -> xs 24 | 25 | """ 26 | TinyHTML = makeGrammar(tinyHTMLGrammar, globals(), name="TinyHTML") 27 | 28 | testSource = "Yes

Man, HTML is great.

How could you even think otherwise?

A Good Website" 29 | 30 | print(TinyHTML(testSource).html()) 31 | -------------------------------------------------------------------------------- /examples/parsley_json.py: -------------------------------------------------------------------------------- 1 | from parsley import makeGrammar 2 | jsonGrammar = r""" 3 | ws = (' ' | '\r' | '\n' | '\t')* 4 | object = ws '{' members:m ws '}' ws -> dict(m) 5 | members = (pair:first (ws ',' pair)*:rest -> [first] + rest) | -> [] 6 | pair = ws string:k ws ':' value:v -> (k, v) 7 | array = '[' elements:xs ws ']' -> xs 8 | elements = (value:first (ws ',' value)*:rest -> [first] + rest) | -> [] 9 | value = ws (string | number | object | array 10 | | 'true' -> True 11 | | 'false' -> False 12 | | 'null' -> None) 13 | string = '"' (escapedChar | ~'"' anything)*:c '"' -> ''.join(c) 14 | escapedChar = '\\' (('"' -> '"') |('\\' -> '\\') 15 | |('/' -> '/') |('b' -> '\b') 16 | |('f' -> '\f') |('n' -> '\n') 17 | |('r' -> '\r') |('t' -> '\t') 18 | |('\'' -> '\'') | escapedUnicode) 19 | hexdigit = :x ?(x in '0123456789abcdefABCDEF') -> x 20 | escapedUnicode = 'u' :hs -> unichr(int(hs, 16)) 21 | number = ('-' | -> ''):sign (intPart:ds (floatPart(sign ds) 22 | | -> int(sign + ds))) 23 | digit = :x ?(x in '0123456789') -> x 24 | digits = 25 | digit1_9 = :x ?(x in '123456789') -> x 26 | intPart = (digit1_9:first digits:rest -> first + rest) | digit 27 | floatPart :sign :ds = <('.' digits exponent?) | exponent>:tail 28 | -> float(sign + ds + tail) 29 | exponent = ('e' | 'E') ('+' | '-')? digits 30 | 31 | top = (object | array) ws 32 | """ 33 | 34 | JSONParser = makeGrammar(jsonGrammar, {}) 35 | -------------------------------------------------------------------------------- /examples/protocol/netstring_reversal.py: -------------------------------------------------------------------------------- 1 | from twisted.internet.defer import Deferred 2 | from twisted.internet.endpoints import TCP4ServerEndpoint 3 | from twisted.internet.protocol import ServerFactory 4 | from twisted.internet.task import react 5 | 6 | from parsley import makeProtocol 7 | from netstrings import grammar, NetstringSender 8 | 9 | 10 | class NetstringReverserReceiver(object): 11 | currentRule = 'receiveNetstring' 12 | 13 | def __init__(self, sender): 14 | self.sender = sender 15 | 16 | def prepareParsing(self, parser): 17 | pass 18 | 19 | def finishParsing(self, reason): 20 | pass 21 | 22 | def netstringReceived(self, string): 23 | self.sender.sendNetstring(string[::-1]) 24 | 25 | 26 | NetstringReverser = makeProtocol( 27 | grammar, NetstringSender, NetstringReverserReceiver) 28 | 29 | 30 | class NetstringReverserFactory(ServerFactory): 31 | protocol = NetstringReverser 32 | 33 | 34 | def main(reactor): 35 | server = TCP4ServerEndpoint(reactor, 1234) 36 | d = server.listen(NetstringReverserFactory()) 37 | d.addCallback(lambda p: Deferred()) 38 | return d 39 | 40 | react(main, []) 41 | -------------------------------------------------------------------------------- /examples/protocol/netstrings.py: -------------------------------------------------------------------------------- 1 | grammar = """ 2 | 3 | nonzeroDigit = digit:x ?(x != '0') 4 | digits = <'0' | nonzeroDigit digit*>:i -> int(i) 5 | 6 | netstring = digits:length ':' :string ',' -> string 7 | 8 | receiveNetstring = netstring:string -> receiver.netstringReceived(string) 9 | 10 | """ 11 | 12 | class NetstringSender(object): 13 | def __init__(self, transport): 14 | self.transport = transport 15 | 16 | def sendNetstring(self, string): 17 | self.transport.write('%d:%s,' % (len(string), string)) 18 | -------------------------------------------------------------------------------- /examples/protocol/test_netstrings.py: -------------------------------------------------------------------------------- 1 | import parsley 2 | import pytest 3 | import netstrings 4 | 5 | proto_helpers = pytest.importorskip('twisted.test.proto_helpers') 6 | StringTransport = proto_helpers.StringTransport 7 | 8 | 9 | netstringGrammar = parsley.makeGrammar(netstrings.grammar, {}) 10 | 11 | def stringParserFromRule(rule): 12 | def parseString(s): 13 | return getattr(netstringGrammar(s), rule)() 14 | return parseString 15 | 16 | def test_digits_parsing(): 17 | parse = stringParserFromRule('digits') 18 | 19 | assert parse('0') == 0 20 | assert parse('1') == 1 21 | assert parse('1234567890') == 1234567890 22 | with pytest.raises(parsley.ParseError): 23 | parse('01') 24 | with pytest.raises(parsley.ParseError): 25 | parse('0001') 26 | 27 | def test_netstring_parsing(): 28 | parse = stringParserFromRule('netstring') 29 | 30 | assert parse('0:,') == '' 31 | assert parse('1:x,') == 'x' 32 | assert parse('10:abcdefghij,') == 'abcdefghij' 33 | 34 | 35 | def build_testing_sender(): 36 | transport = StringTransport() 37 | sender = netstrings.NetstringSender(transport) 38 | return sender, transport 39 | 40 | def test_sending_empty_netstring(): 41 | sender, transport = build_testing_sender() 42 | sender.sendNetstring('') 43 | assert transport.value() == '0:,' 44 | 45 | def test_sending_one_netstring(): 46 | sender, transport = build_testing_sender() 47 | sender.sendNetstring('foobar') 48 | assert transport.value() == '6:foobar,' 49 | 50 | def test_sending_two_netstrings(): 51 | sender, transport = build_testing_sender() 52 | sender.sendNetstring('spam') 53 | sender.sendNetstring('egggs') 54 | assert transport.value() == '4:spam,5:egggs,' 55 | 56 | 57 | class FakeReceiver(object): 58 | currentRule = 'receiveNetstring' 59 | 60 | def __init__(self, sender): 61 | self.sender = sender 62 | self.netstrings = [] 63 | self.connected = False 64 | self.lossReason = None 65 | 66 | def netstringReceived(self, s): 67 | self.netstrings.append(s) 68 | 69 | def prepareParsing(self, parser): 70 | self.connected = True 71 | 72 | def finishParsing(self, reason): 73 | self.lossReason = reason 74 | 75 | TestingNetstringProtocol = parsley.makeProtocol( 76 | netstrings.grammar, netstrings.NetstringSender, FakeReceiver) 77 | 78 | def build_testing_protocol(): 79 | protocol = TestingNetstringProtocol() 80 | transport = StringTransport() 81 | protocol.makeConnection(transport) 82 | return protocol, transport 83 | 84 | def test_receiving_empty_netstring(): 85 | protocol, transport = build_testing_protocol() 86 | protocol.dataReceived('0:,') 87 | assert protocol.receiver.netstrings == [''] 88 | 89 | def test_receiving_one_netstring_by_byte(): 90 | protocol, transport = build_testing_protocol() 91 | for c in '4:spam,': 92 | protocol.dataReceived(c) 93 | assert protocol.receiver.netstrings == ['spam'] 94 | 95 | def test_receiving_two_netstrings_by_byte(): 96 | protocol, transport = build_testing_protocol() 97 | for c in '4:spam,4:eggs,': 98 | protocol.dataReceived(c) 99 | assert protocol.receiver.netstrings == ['spam', 'eggs'] 100 | 101 | def test_receiving_two_netstrings_in_chunks(): 102 | protocol, transport = build_testing_protocol() 103 | for c in ['4:', 'spa', 'm,4', ':eg', 'gs,']: 104 | protocol.dataReceived(c) 105 | assert protocol.receiver.netstrings == ['spam', 'eggs'] 106 | 107 | def test_receiving_two_netstrings_at_once(): 108 | protocol, transport = build_testing_protocol() 109 | protocol.dataReceived('4:spam,4:eggs,') 110 | assert protocol.receiver.netstrings == ['spam', 'eggs'] 111 | 112 | def test_establishing_connection(): 113 | assert not FakeReceiver(None).connected 114 | protocol, transport = build_testing_protocol() 115 | assert protocol.receiver.connected 116 | 117 | def test_losing_connection(): 118 | protocol, transport = build_testing_protocol() 119 | reason = object() 120 | protocol.connectionLost(reason) 121 | assert protocol.receiver.lossReason == reason 122 | -------------------------------------------------------------------------------- /examples/test_iso8601.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import unittest 3 | 4 | import pytest 5 | 6 | pytz = pytest.importorskip('pytz') 7 | from iso8601 import DateTimeParser 8 | 9 | 10 | class TestDatetimeParsing(unittest.TestCase): 11 | def test_date(self): 12 | self.assertEqual( 13 | datetime.date(2001, 12, 25), 14 | DateTimeParser('2001-12-25').date()) 15 | 16 | def test_naive_time(self): 17 | self.assertEqual( 18 | datetime.time(13, 59, 43), 19 | DateTimeParser('13:59:43').naive_time()) 20 | 21 | def test_fractional_naive_time(self): 22 | self.assertEqual( 23 | datetime.time(13, 59, 43, 880000), 24 | DateTimeParser('13:59:43.88').naive_time()) 25 | 26 | def test_utc_time(self): 27 | self.assertEqual( 28 | datetime.time(13, 59, 43, tzinfo=pytz.UTC), 29 | DateTimeParser('13:59:43Z').time()) 30 | 31 | def test_fractional_utc_time(self): 32 | self.assertEqual( 33 | datetime.time(13, 59, 43, 880000, tzinfo=pytz.UTC), 34 | DateTimeParser('13:59:43.88Z').time()) 35 | 36 | def test_timezone_time(self): 37 | self.assertEqual( 38 | datetime.time(13, 59, 43, tzinfo=pytz.FixedOffset(60)), 39 | DateTimeParser('13:59:43+01:00').time()) 40 | 41 | def test_fractional_timezone_time(self): 42 | self.assertEqual( 43 | datetime.time(13, 59, 43, 770000, tzinfo=pytz.FixedOffset(60)), 44 | DateTimeParser('13:59:43.77+01:00').time()) 45 | 46 | def test_numeric_offset(self): 47 | get_offset = lambda x: DateTimeParser(x).numeric_offset() 48 | self.assertEqual(pytz.FixedOffset(0), get_offset('+00:00')) 49 | self.assertEqual(pytz.FixedOffset(90), get_offset('+01:30')) 50 | self.assertEqual(pytz.FixedOffset(-150), get_offset('-02:30')) 51 | 52 | def test_datetime(self): 53 | self.assertEqual( 54 | datetime.datetime( 55 | 2001, 12, 25, 13, 59, 43, 770000, tzinfo=pytz.UTC), 56 | DateTimeParser('2001-12-25T13:59:43.77Z').datetime()) 57 | -------------------------------------------------------------------------------- /examples/test_parsley_json.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from parsley_json import JSONParser 4 | import unittest 5 | 6 | class JSONParserTests(unittest.TestCase): 7 | 8 | 9 | def test_integer(self): 10 | self.assertEqual(JSONParser("123").number(), 123) 11 | self.assertEqual(JSONParser("-123").number(), -123) 12 | self.assertEqual(JSONParser("0").number(), 0) 13 | 14 | def test_float(self): 15 | self.assertEqual(JSONParser("0.5").number(), 0.5) 16 | self.assertEqual(JSONParser("1.0").number(), 1.0) 17 | self.assertEqual(JSONParser("-3.5").number(), -3.5) 18 | self.assertEqual(JSONParser("2e7").number(), 2e7) 19 | self.assertEqual(JSONParser("1.2E6").number(), 1.2E6) 20 | 21 | def test_string(self): 22 | self.assertEqual(JSONParser('u2603').escapedUnicode(), "\u2603") 23 | self.assertEqual(JSONParser('"foo"').string(), "foo") 24 | self.assertEqual(JSONParser(r'"foo\n"').string(), "foo\n") 25 | self.assertEqual(JSONParser(r'"foo\rbaz\u2603"').string(), "foo\rbaz\u2603") 26 | self.assertEqual(JSONParser(r'"\\\/\b\"\f\t"').string(), '\\/\b"\f\t') 27 | 28 | def test_literals(self): 29 | self.assertEqual(JSONParser(r'true').value(), True) 30 | self.assertEqual(JSONParser(r'false').value(), False) 31 | self.assertEqual(JSONParser(r'null').value(), None) 32 | 33 | def test_array(self): 34 | self.assertEqual(JSONParser(r'[1, 2]').array(), [1, 2]) 35 | self.assertEqual(JSONParser(r'["foo", []]').array(), ["foo", []]) 36 | 37 | def test_object(self): 38 | self.assertEqual(JSONParser(r'{"foo": 1}').object(), {"foo": 1}) 39 | self.assertEqual(JSONParser(r'{"foo": "baz", "x": {}}').object(), 40 | {"foo": "baz", "x": {}}) 41 | 42 | -------------------------------------------------------------------------------- /examples/trace_json.py: -------------------------------------------------------------------------------- 1 | from parsley import makeGrammar 2 | from parsley_json import jsonGrammar 3 | 4 | 5 | def traceparse(jsonData): 6 | trace = [] 7 | def traceit(*a): 8 | trace.append(a) 9 | JSONParser = makeGrammar(jsonGrammar, {}, 10 | tracefunc=traceit) 11 | return JSONParser(jsonData).top(), trace 12 | -------------------------------------------------------------------------------- /examples/trace_visualiser.py: -------------------------------------------------------------------------------- 1 | from tkinter.scrolledtext import ScrolledText 2 | import tkinter as tk 3 | 4 | from trace_json import traceparse 5 | from parsley_json import jsonGrammar 6 | 7 | jsonData = open('337141-steamcube.json').read() 8 | 9 | 10 | class Tracer(object): 11 | 12 | def __init__(self, grammarWin, inputWin, logWin, trace): 13 | self.grammarWin = grammarWin 14 | self.inputWin = inputWin 15 | self.logWin = logWin 16 | self.trace = trace 17 | self.position = 0 18 | 19 | def advance(self): 20 | if self.position < len(self.trace): 21 | self.position += 1 22 | self.display() 23 | 24 | def rewind(self): 25 | if self.position > 0: 26 | self.position -= 1 27 | self.display() 28 | 29 | def display(self): 30 | def updateHighlight(w, start, end=None): 31 | w.tag_remove("highlight", "1.0", tk.END) 32 | start = "1.0+%sc" % (start,) 33 | if end is not None: 34 | end = "1.0+%sc" % (end,) 35 | w.tag_add("highlight", start, end) 36 | w.tag_configure("highlight", background="yellow") 37 | 38 | _, (grammarStart, grammarEnd), inputPos = self.trace[self.position] 39 | updateHighlight(self.grammarWin, grammarStart, grammarEnd) 40 | updateHighlight(self.inputWin, inputPos) 41 | 42 | 43 | def display(grammar, src, trace): 44 | r = tk.Tk() 45 | f = tk.Frame(master=r) 46 | lt = ScrolledText(master=f) 47 | rt = ScrolledText(master=f) 48 | lt.pack(side="left", expand=True, fill="both") 49 | rt.pack(side="right", expand=True, fill="both") 50 | 51 | bot = ScrolledText(master=r, height=5) 52 | tracer = Tracer(lt, rt, bot, trace) 53 | toolbar = tk.Frame(master=r) 54 | tk.Button(toolbar, text="Next", width=5, command=tracer.advance).pack( 55 | side="left") 56 | tk.Button(toolbar, text="Prev", width=5, command=tracer.rewind).pack( 57 | side="left") 58 | f.pack(expand=1, fill="both") 59 | toolbar.pack(fill=tk.X) 60 | bot.pack(fill=tk.X) 61 | 62 | lt.insert(tk.END, grammar) 63 | rt.insert(tk.END, src) 64 | tracer.display() 65 | return r 66 | 67 | _, trace = traceparse(jsonData) 68 | root = display(jsonGrammar, jsonData, trace) 69 | 70 | root.mainloop() 71 | -------------------------------------------------------------------------------- /ometa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/ometa/__init__.py -------------------------------------------------------------------------------- /ometa/_generated/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/ometa/_generated/__init__.py -------------------------------------------------------------------------------- /ometa/_generated/parsley_termactions.py: -------------------------------------------------------------------------------- 1 | def createParserClass(GrammarBase, ruleGlobals): 2 | if ruleGlobals is None: 3 | ruleGlobals = {} 4 | class parsley_termactions(GrammarBase): 5 | def rule_ruleValue(self): 6 | _locals = {'self': self} 7 | self.locals['ruleValue'] = _locals 8 | self._trace(' ws', (11, 14), self.input.position) 9 | _G_apply_1, lastError = self._apply(self.rule_ws, "ws", []) 10 | self.considerError(lastError, 'ruleValue') 11 | self._trace(" '->'", (14, 19), self.input.position) 12 | _G_exactly_2, lastError = self.exactly('->') 13 | self.considerError(lastError, 'ruleValue') 14 | self._trace(' term', (19, 24), self.input.position) 15 | _G_apply_3, lastError = self._apply(self.rule_term, "term", []) 16 | self.considerError(lastError, 'ruleValue') 17 | _locals['tt'] = _G_apply_3 18 | _G_python_4, lastError = eval('t.Action(tt)', self.globals, _locals), None 19 | self.considerError(lastError, 'ruleValue') 20 | return (_G_python_4, self.currentError) 21 | 22 | 23 | def rule_semanticPredicate(self): 24 | _locals = {'self': self} 25 | self.locals['semanticPredicate'] = _locals 26 | self._trace(' ws', (64, 67), self.input.position) 27 | _G_apply_5, lastError = self._apply(self.rule_ws, "ws", []) 28 | self.considerError(lastError, 'semanticPredicate') 29 | self._trace(" '?('", (67, 72), self.input.position) 30 | _G_exactly_6, lastError = self.exactly('?(') 31 | self.considerError(lastError, 'semanticPredicate') 32 | self._trace(' term', (72, 77), self.input.position) 33 | _G_apply_7, lastError = self._apply(self.rule_term, "term", []) 34 | self.considerError(lastError, 'semanticPredicate') 35 | _locals['tt'] = _G_apply_7 36 | self._trace(' ws', (80, 83), self.input.position) 37 | _G_apply_8, lastError = self._apply(self.rule_ws, "ws", []) 38 | self.considerError(lastError, 'semanticPredicate') 39 | self._trace(" ')'", (83, 87), self.input.position) 40 | _G_exactly_9, lastError = self.exactly(')') 41 | self.considerError(lastError, 'semanticPredicate') 42 | _G_python_10, lastError = eval('t.Predicate(tt)', self.globals, _locals), None 43 | self.considerError(lastError, 'semanticPredicate') 44 | return (_G_python_10, self.currentError) 45 | 46 | 47 | def rule_semanticAction(self): 48 | _locals = {'self': self} 49 | self.locals['semanticAction'] = _locals 50 | self._trace(' ws', (124, 127), self.input.position) 51 | _G_apply_11, lastError = self._apply(self.rule_ws, "ws", []) 52 | self.considerError(lastError, 'semanticAction') 53 | self._trace(" '!('", (127, 132), self.input.position) 54 | _G_exactly_12, lastError = self.exactly('!(') 55 | self.considerError(lastError, 'semanticAction') 56 | self._trace(' term', (132, 137), self.input.position) 57 | _G_apply_13, lastError = self._apply(self.rule_term, "term", []) 58 | self.considerError(lastError, 'semanticAction') 59 | _locals['tt'] = _G_apply_13 60 | self._trace(' ws', (140, 143), self.input.position) 61 | _G_apply_14, lastError = self._apply(self.rule_ws, "ws", []) 62 | self.considerError(lastError, 'semanticAction') 63 | self._trace(" ')'", (143, 147), self.input.position) 64 | _G_exactly_15, lastError = self.exactly(')') 65 | self.considerError(lastError, 'semanticAction') 66 | _G_python_16, lastError = eval('t.Action(tt)', self.globals, _locals), None 67 | self.considerError(lastError, 'semanticAction') 68 | return (_G_python_16, self.currentError) 69 | 70 | 71 | def rule_application(self): 72 | _locals = {'self': self} 73 | self.locals['application'] = _locals 74 | def _G_optional_17(): 75 | self._trace(' indentation', (178, 190), self.input.position) 76 | _G_apply_18, lastError = self._apply(self.rule_indentation, "indentation", []) 77 | self.considerError(lastError, None) 78 | return (_G_apply_18, self.currentError) 79 | def _G_optional_19(): 80 | return (None, self.input.nullError()) 81 | _G_or_20, lastError = self._or([_G_optional_17, _G_optional_19]) 82 | self.considerError(lastError, 'application') 83 | self._trace(' name', (191, 196), self.input.position) 84 | _G_apply_21, lastError = self._apply(self.rule_name, "name", []) 85 | self.considerError(lastError, 'application') 86 | _locals['name'] = _G_apply_21 87 | def _G_or_22(): 88 | self._trace("'('", (221, 224), self.input.position) 89 | _G_exactly_23, lastError = self.exactly('(') 90 | self.considerError(lastError, None) 91 | self._trace(' term_arglist', (224, 237), self.input.position) 92 | _G_apply_24, lastError = self._apply(self.rule_term_arglist, "term_arglist", []) 93 | self.considerError(lastError, None) 94 | _locals['args'] = _G_apply_24 95 | self._trace(" ')'", (242, 246), self.input.position) 96 | _G_exactly_25, lastError = self.exactly(')') 97 | self.considerError(lastError, None) 98 | _G_python_26, lastError = eval('t.Apply(name, self.rulename, args)', self.globals, _locals), None 99 | self.considerError(lastError, None) 100 | return (_G_python_26, self.currentError) 101 | def _G_or_27(): 102 | _G_python_28, lastError = eval('t.Apply(name, self.rulename, [])', self.globals, _locals), None 103 | self.considerError(lastError, None) 104 | return (_G_python_28, self.currentError) 105 | _G_or_29, lastError = self._or([_G_or_22, _G_or_27]) 106 | self.considerError(lastError, 'application') 107 | return (_G_or_29, self.currentError) 108 | 109 | 110 | if parsley_termactions.globals is not None: 111 | parsley_termactions.globals = parsley_termactions.globals.copy() 112 | parsley_termactions.globals.update(ruleGlobals) 113 | else: 114 | parsley_termactions.globals = ruleGlobals 115 | return parsley_termactions -------------------------------------------------------------------------------- /ometa/compat.py: -------------------------------------------------------------------------------- 1 | import ometa 2 | from ometa.runtime import OMetaGrammarBase 3 | from ometa.grammar import OMeta 4 | from ometa.grammar import loadGrammar 5 | from terml.nodes import termMaker as t 6 | 7 | OMeta1 = loadGrammar(ometa, "pymeta_v1", 8 | globals(), OMetaGrammarBase) 9 | -------------------------------------------------------------------------------- /ometa/grammar.py: -------------------------------------------------------------------------------- 1 | # -*- test-case-name: ometa.test.test_pymeta -*- 2 | """ 3 | Public interface to OMeta, as well as the grammars used to compile grammar 4 | definitions. 5 | """ 6 | import os.path 7 | import string 8 | try: 9 | from StringIO import StringIO 10 | except ImportError: 11 | from io import StringIO 12 | 13 | from terml.nodes import termMaker as t 14 | import ometa 15 | from ometa._generated.parsley import createParserClass as makeBootGrammar 16 | from ometa.builder import TermActionPythonWriter, moduleFromGrammar, TextWriter 17 | from ometa.runtime import OMetaBase, OMetaGrammarBase 18 | 19 | OMeta = makeBootGrammar(OMetaGrammarBase, globals()) 20 | 21 | 22 | def loadGrammar(pkg, name, globals, superclass=OMetaBase): 23 | try: 24 | m = __import__('.'.join([pkg.__name__, '_generated', name]), 25 | fromlist=[name], level=0) 26 | except ImportError: 27 | base = os.path.dirname(os.path.abspath(pkg.__file__)) 28 | src = open(os.path.join(base, name + ".parsley")).read() 29 | m = OMeta.makeGrammar(src, name) 30 | 31 | return m.createParserClass(superclass, globals) 32 | 33 | class TermOMeta(loadGrammar( 34 | ometa, "parsley_termactions", 35 | globals(), superclass=OMeta)): 36 | 37 | _writer = TermActionPythonWriter 38 | 39 | @classmethod 40 | def makeGrammar(cls, grammar, name): 41 | """ 42 | Define a new parser class with the rules in the given grammar. 43 | 44 | @param grammar: A string containing a PyMeta grammar. 45 | @param globals: A dict of names that should be accessible by this 46 | grammar. 47 | @param name: The name of the class to be generated. 48 | @param superclass: The class the generated class is a child of. 49 | """ 50 | g = cls(grammar) 51 | tree = g.parseGrammar(name) 52 | modname = "pymeta_grammar__" + name 53 | filename = "/pymeta_generated_code/" + modname + ".py" 54 | source = g.writeTerm(tree, grammar) 55 | return moduleFromGrammar(source, name, modname, filename) 56 | 57 | def writeTerm(self, term, grammar): 58 | f = StringIO() 59 | pw = self._writer(term, grammar) 60 | out = TextWriter(f) 61 | pw.output(out) 62 | return f.getvalue().strip() 63 | 64 | def rule_term(self): 65 | from terml.parser import TermLParser 66 | tp = TermLParser('') 67 | tp.input = self.input 68 | self.input.setMemo('term', None) 69 | val, err = tp.apply('term') 70 | self.input = tp.input 71 | return val, err 72 | 73 | def rule_term_arglist(self): 74 | from terml.parser import TermLParser 75 | tp = TermLParser('') 76 | tp.input = self.input 77 | val, err = tp.apply('argList') 78 | self.input = tp.input 79 | return val, err 80 | 81 | TreeTransformerGrammar = loadGrammar( 82 | ometa, "parsley_tree_transformer", 83 | globals(), superclass=OMeta) 84 | -------------------------------------------------------------------------------- /ometa/parsley.parsley: -------------------------------------------------------------------------------- 1 | comment = '#' (~'\n' anything)* 2 | hspace = ' ' | '\t' | comment 3 | vspace = '\r\n' | '\r' | '\n' 4 | ws = (hspace | vspace | comment)* 5 | 6 | emptyline = hspace* vspace 7 | indentation = emptyline* hspace+ 8 | noindentation = emptyline* ~~~hspace 9 | 10 | number = ws 11 | ('-' barenumber:x -> t.Exactly(-x, span=self.getSpan()) 12 | |barenumber:x -> t.Exactly(x, span=self.getSpan())) 13 | barenumber = '0' (('x'|'X') :hs -> int(hs, 16) 14 | |:ds -> int(ds, 8)) 15 | |:ds -> int(ds) 16 | octaldigit = :x ?(x in '01234567' ) -> x 17 | hexdigit = :x ?(x in '0123456789ABCDEFabcdef') -> x 18 | 19 | escapedChar = '\\' ('n' -> "\n" 20 | |'r' -> "\r" 21 | |'t' -> "\t" 22 | |'b' -> "\b" 23 | |'f' -> "\f" 24 | |'"' -> '"' 25 | |'\'' -> "'" 26 | |'x' :d -> chr(int(d, 16)) 27 | |'\\' -> "\\") 28 | 29 | character = ws '\'' (~'\'' (escapedChar | anything))+:c 30 | ws '\'' -> t.Exactly(''.join(c), span=self.getSpan()) 31 | 32 | string = ws '"' (escapedChar | ~('"') anything)*:c 33 | ws '"' -> t.Token(''.join(c), span=self.getSpan()) 34 | 35 | name = 36 | 37 | args = ('(' !(self.applicationArgs(finalChar=')')):args ')' 38 | -> args 39 | | -> []) 40 | 41 | application = indentation? name:name args:args 42 | -> t.Apply(name, self.rulename, args, span=self.getSpan()) 43 | 44 | foreignApply = indentation? name:grammar_name '.' name:rule_name args:args 45 | -> t.ForeignApply(grammar_name, rule_name, self.rulename, args, span=self.getSpan()) 46 | 47 | traceable = !(self.startSpan()) 48 | ( foreignApply 49 | | application 50 | | ruleValue 51 | | semanticPredicate 52 | | semanticAction 53 | | number:n !(self.isTree()) -> n 54 | | character 55 | | string) 56 | 57 | expr1 = traceable 58 | | ws '(' expr:e ws ')' -> e 59 | | ws '<' expr:e ws '>' 60 | -> t.ConsumedBy(e) 61 | | ws '[' expr?:e ws ']' !(self.isTree()) 62 | -> t.List(e) if e else t.List() 63 | 64 | expr2 = (ws '~' ('~' expr2:e -> t.Lookahead(e) 65 | | expr2:e -> t.Not(e) 66 | ) 67 | |expr1) 68 | 69 | repeatTimes = (barenumber:x -> int(x)) | name 70 | 71 | expr3 = (expr2:e 72 | ('*' -> t.Many(e) 73 | |'+' -> t.Many1(e) 74 | |'?' -> t.Optional(e) 75 | |customLabel:l -> t.Label(e, l) 76 | |'{' ws repeatTimes:start ws ( 77 | (',' ws repeatTimes:end ws '}' 78 | -> t.Repeat(start, end, e)) 79 | | ws '}' 80 | -> t.Repeat(start, start, e)) 81 | | -> e 82 | )):r 83 | (':' name:n -> t.Bind(n, r) 84 | | ':(' name:n (',' ws name)*:others ws ')' 85 | (-> [n] + others if others else n):n -> t.Bind(n, r) 86 | | -> r) 87 | |ws ':' name:n 88 | -> t.Bind(n, t.Apply("anything", self.rulename, [])) 89 | 90 | expr4 = expr3+:es -> es[0] if len(es) == 1 else t.And(es) 91 | 92 | expr = expr4:e (ws '|' expr4)*:es 93 | -> t.Or([e] + es) if es else e 94 | 95 | ruleValue = ws '->' -> self.ruleValueExpr(True) 96 | 97 | customLabel = (ws '^' ws '(' <(~')' anything)+>:e ')' -> e) ^ (customLabelException) 98 | 99 | semanticPredicate = ws '?(' -> self.semanticPredicateExpr() 100 | 101 | semanticAction = ws '!(' -> self.semanticActionExpr() 102 | 103 | ruleEnd = ((hspace* vspace+) | end) ^ (rule end) 104 | 105 | rulePart :requiredName = noindentation name:n ?(n == requiredName) 106 | !(setattr(self, "rulename", n)) 107 | expr4?:args 108 | (ws '=' expr:e ruleEnd 109 | -> t.And([args, e]) if args else e 110 | | ruleEnd -> args) 111 | 112 | rule = noindentation ~~(name:n) rulePart(n)+:rs -> t.Rule(n, t.Or(rs)) 113 | 114 | 115 | grammar = rule*:rs ws -> t.Grammar(self.name, self.tree_target, rs) 116 | -------------------------------------------------------------------------------- /ometa/parsley_termactions.parsley: -------------------------------------------------------------------------------- 1 | ruleValue = ws '->' term:tt -> t.Action(tt) 2 | 3 | semanticPredicate = ws '?(' term:tt ws ')' -> t.Predicate(tt) 4 | 5 | semanticAction = ws '!(' term:tt ws ')' -> t.Action(tt) 6 | 7 | application = indentation? name:name 8 | ('(' term_arglist:args ')' 9 | -> t.Apply(name, self.rulename, args) 10 | | -> t.Apply(name, self.rulename, [])) 11 | -------------------------------------------------------------------------------- /ometa/parsley_tree_transformer.parsley: -------------------------------------------------------------------------------- 1 | termPattern = indentation? name:name ?(name[0].isupper()) 2 | '(' expr?:patts ')' -> t.TermPattern(name, patts) 3 | 4 | subtransform = ws '@' name:n -> t.Bind(n, t.Apply('transform', self.rulename, [])) 5 | 6 | wide_templatedValue = ws '-->' ' '* wideTemplateBits:contents -> t.StringTemplate(contents) 7 | tall_templatedValue = hspace? '{{{' (' ' | '\t')* vspace? tallTemplateBits:contents '}}}' -> t.StringTemplate(contents) 8 | 9 | tallTemplateBits = (exprHole | tallTemplateText)* 10 | tallTemplateText = <(~('}}}' | '$' | '\r' | '\n') anything | '$' '$')+ vspace*> | vspace 11 | 12 | wideTemplateBits = (exprHole | wideTemplateText)* 13 | wideTemplateText = <(~(vspace | end |'$') anything | '$' '$')+> 14 | 15 | exprHole = '$' name:n -> t.QuasiExprHole(n) 16 | 17 | expr1 = foreignApply 18 | |termPattern 19 | |subtransform 20 | |application 21 | |ruleValue 22 | |wide_templatedValue 23 | |tall_templatedValue 24 | |semanticPredicate 25 | |semanticAction 26 | |number:n !(self.isTree()) -> n 27 | |character 28 | |string 29 | |ws '(' expr?:e ws ')' -> e 30 | |ws '[' expr?:e ws ']' -> t.TermPattern(".tuple.", e or t.And([])) 31 | 32 | grammar = rule*:rs ws -> t.Grammar(self.name, True, rs) 33 | rule = noindentation ~~(name:n) (termRulePart(n)+:rs | rulePart(n)+:rs) -> t.Rule(n, t.Or(rs)) 34 | 35 | termRulePart :requiredName = noindentation !(setattr(self, "rulename", requiredName)) 36 | termPattern:tt ?(tt.args[0].data == requiredName) token("=")? expr:tail -> t.And([tt, tail]) -------------------------------------------------------------------------------- /ometa/protocol.py: -------------------------------------------------------------------------------- 1 | from twisted.internet.protocol import Protocol 2 | from twisted.python.failure import Failure 3 | 4 | from ometa.tube import TrampolinedParser 5 | 6 | 7 | class ParserProtocol(Protocol): 8 | """ 9 | A Twisted ``Protocol`` subclass for parsing stream protocols. 10 | """ 11 | 12 | 13 | def __init__(self, grammar, senderFactory, receiverFactory, bindings): 14 | """ 15 | Initialize the parser. 16 | 17 | :param grammar: An OMeta grammar to use for parsing. 18 | :param senderFactory: A unary callable that returns a sender given a 19 | transport. 20 | :param receiverFactory: A unary callable that returns a receiver given 21 | a sender. 22 | :param bindings: A dict of additional globals for the grammar rules. 23 | """ 24 | 25 | self._grammar = grammar 26 | self._bindings = dict(bindings) 27 | self._senderFactory = senderFactory 28 | self._receiverFactory = receiverFactory 29 | self._disconnecting = False 30 | 31 | def connectionMade(self): 32 | """ 33 | Start parsing, since the connection has been established. 34 | """ 35 | 36 | self.sender = self._senderFactory(self.transport) 37 | self.receiver = self._receiverFactory(self.sender) 38 | self.receiver.prepareParsing(self) 39 | self._parser = TrampolinedParser( 40 | self._grammar, self.receiver, self._bindings) 41 | 42 | def dataReceived(self, data): 43 | """ 44 | Receive and parse some data. 45 | 46 | :param data: A ``str`` from Twisted. 47 | """ 48 | 49 | if self._disconnecting: 50 | return 51 | 52 | try: 53 | self._parser.receive(data) 54 | except Exception: 55 | self.connectionLost(Failure()) 56 | self.transport.abortConnection() 57 | return 58 | 59 | def connectionLost(self, reason): 60 | """ 61 | Stop parsing, since the connection has been lost. 62 | 63 | :param reason: A ``Failure`` instance from Twisted. 64 | """ 65 | 66 | if self._disconnecting: 67 | return 68 | self.receiver.finishParsing(reason) 69 | self._disconnecting = True 70 | -------------------------------------------------------------------------------- /ometa/pymeta_v1.parsley: -------------------------------------------------------------------------------- 1 | comment = '#' (~'\n' anything)* 2 | hspace = ' ' | '\t' | comment 3 | vspace = '\r\n' | '\r' | '\n' 4 | ws = (hspace | vspace | comment)* 5 | 6 | number = ws ('-' barenumber:x -> t.Exactly(-x) 7 | |barenumber:x -> t.Exactly(x)) 8 | barenumber = '0' (('x'|'X') :hs -> int(hs, 16) 9 | |:ds -> int(ds, 8)) 10 | |:ds -> int(ds) 11 | octaldigit = :x ?(x in '01234567' ) -> x 12 | hexdigit = :x ?(x in '0123456789ABCDEFabcdef') -> x 13 | 14 | escapedChar = '\\' ('n' -> "\n" 15 | |'r' -> "\r" 16 | |'t' -> "\t" 17 | |'b' -> "\b" 18 | |'f' -> "\f" 19 | |'"' -> '"' 20 | |'\'' -> "'" 21 | |'\\' -> "\\") 22 | 23 | character = ws '\'' (escapedChar | anything):c ws '\'' -> t.Exactly(c) 24 | 25 | string = ws '"' (escapedChar | ~('"') anything)*:c '"' -> t.Exactly(''.join(c)) 26 | 27 | name = 28 | application = (ws '<' ws name:name 29 | (' ' !(self.applicationArgs(finalChar='>')):args '>' 30 | -> t.Apply(name, self.rulename, args) 31 | |ws '>' 32 | -> t.Apply(name, self.rulename, []))) 33 | 34 | expr1 = (application 35 | |ruleValue 36 | |semanticPredicate 37 | |semanticAction 38 | |number:n !(self.isTree()) -> n 39 | |character 40 | |string 41 | |ws '(' expr:e ws ')' -> e 42 | |(ws '[' expr:e ws ']' !(self.isTree()) 43 | -> t.List(e))) 44 | 45 | expr2 = (ws '~' ('~' expr2:e 46 | -> t.Lookahead(e) 47 | |expr2:e -> t.Not(e)) 48 | |expr1) 49 | 50 | expr3 = expr2:e 51 | ('*' -> t.Many(e) 52 | |'+' -> t.Many1(e) 53 | |'?' -> t.Optional(e) 54 | | -> e):r 55 | (':' name:n -> t.Bind(n, r) 56 | | -> r) 57 | | (ws ':' name:n -> t.Bind(n, t.Apply("anything", self.rulename, []))) 58 | 59 | expr4 = expr3*:es -> t.And(es) 60 | 61 | expr = expr4:e (ws '|' expr4)*:es 62 | -> t.Or([e] + es) 63 | 64 | ruleValue = ws '=>' -> self.ruleValueExpr(False) 65 | 66 | semanticPredicate = ws '?(' -> self.semanticPredicateExpr() 67 | 68 | semanticAction = ws '!(' -> self.semanticActionExpr() 69 | 70 | ruleEnd = (hspace* vspace+) | end 71 | rulePart :requiredName = ws name:n ?(n == requiredName) 72 | !(setattr(self, "rulename", n)) 73 | expr4:args 74 | (ws '::=' expr:e ruleEnd 75 | -> t.And([args, e]) 76 | | ruleEnd -> args) 77 | rule = (ws ~~(name:n) rulePart(n):r 78 | (rulePart(n)+:rs -> t.Rule(n, t.Or([r] + rs)) 79 | | -> t.Rule(n, r))) 80 | 81 | grammar = rule*:rs ws -> t.Grammar(self.name, self.tree_target, rs) 82 | -------------------------------------------------------------------------------- /ometa/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/ometa/test/__init__.py -------------------------------------------------------------------------------- /ometa/test/helpers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | class TestCase(unittest.TestCase): 5 | def assertRaises(self, ex, f, *args, **kwargs): 6 | try: 7 | f(*args, **kwargs) 8 | except ex as e: 9 | return e 10 | else: 11 | assert False, "%r didn't raise %r" % (f, ex) 12 | -------------------------------------------------------------------------------- /ometa/test/test_protocol.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pytest 4 | 5 | from ometa.grammar import OMeta 6 | from ometa.runtime import ParseError 7 | 8 | 9 | protocol = pytest.importorskip('ometa.protocol') 10 | ParserProtocol = protocol.ParserProtocol 11 | 12 | testingGrammarSource = """ 13 | 14 | someA = ('a' 'a') -> receiver('a') 15 | someB = ('b' 'b') -> receiver('b') 16 | someC = ('c' 'c') -> receiver('c') 17 | someExc = 'e' -> receiver.raiseSomething() 18 | 19 | initial = someA | someExc 20 | 21 | """ 22 | testGrammar = OMeta(testingGrammarSource).parseGrammar('testGrammar') 23 | 24 | 25 | class SenderFactory(object): 26 | def __init__(self, transport): 27 | self.transport = transport 28 | 29 | 30 | class SomeException(Exception): 31 | pass 32 | 33 | 34 | class ReceiverFactory(object): 35 | currentRule = 'initial' 36 | 37 | def __init__(self, sender): 38 | self.sender = sender 39 | self.calls = [] 40 | self.returnMap = {} 41 | self.connected = False 42 | self.lossReason = None 43 | self.parser = None 44 | 45 | def prepareParsing(self, parser): 46 | self.connected = True 47 | self.parser = parser 48 | 49 | def __call__(self, v): 50 | self.calls.append(v) 51 | if v in self.returnMap: 52 | self.currentRule = self.returnMap[v] 53 | 54 | def raiseSomething(self): 55 | raise SomeException() 56 | 57 | def finishParsing(self, reason): 58 | self.lossReason = reason 59 | 60 | 61 | class FakeTransport(object): 62 | def __init__(self): 63 | self.aborted = False 64 | 65 | def abortConnection(self): 66 | self.aborted = True 67 | 68 | 69 | class ParserProtocolTestCase(unittest.TestCase): 70 | def setUp(self): 71 | self.protocol = ParserProtocol( 72 | testGrammar, SenderFactory, ReceiverFactory, {}) 73 | 74 | def test_transportPassed(self): 75 | """The sender is passed the transport recieved by the protocol.""" 76 | transport = object() 77 | self.protocol.makeConnection(transport) 78 | self.assertEqual(transport, self.protocol.sender.transport) 79 | 80 | def test_senderPassed(self): 81 | """The sender is passed to the receiver.""" 82 | self.protocol.makeConnection(None) 83 | self.assertEqual(self.protocol.sender, self.protocol.receiver.sender) 84 | 85 | def test_parserPassed(self): 86 | """The parser is passed in the prepareParsing method.""" 87 | self.protocol.makeConnection(None) 88 | self.assertEqual(self.protocol, self.protocol.receiver.parser) 89 | 90 | def test_connectionEstablishes(self): 91 | """prepareParsing is called on the receiver after connection establishment.""" 92 | self.protocol.makeConnection(None) 93 | self.assert_(self.protocol.receiver.connected) 94 | 95 | def test_basicParsing(self): 96 | """Rules can be parsed multiple times for the same effect.""" 97 | self.protocol.makeConnection(None) 98 | self.protocol.dataReceived('aa') 99 | self.assertEqual(self.protocol.receiver.calls, ['a']) 100 | self.protocol.dataReceived('aa') 101 | self.assertEqual(self.protocol.receiver.calls, ['a', 'a']) 102 | 103 | def test_parsingChunks(self): 104 | """Any number of rules can be called from one dataRecived.""" 105 | self.protocol.makeConnection(None) 106 | self.protocol.dataReceived('a') 107 | self.assertEqual(self.protocol.receiver.calls, []) 108 | self.protocol.dataReceived('aa') 109 | self.assertEqual(self.protocol.receiver.calls, ['a']) 110 | self.protocol.dataReceived('aaa') 111 | self.assertEqual(self.protocol.receiver.calls, ['a', 'a', 'a']) 112 | 113 | def test_ruleSwitching(self): 114 | """The rule being parsed can specify the next rule to be parsed.""" 115 | self.protocol.makeConnection(None) 116 | self.protocol.receiver.returnMap.update(dict(a='someB', b='someA')) 117 | self.protocol.dataReceived('aa') 118 | self.assertEqual(self.protocol.receiver.calls, ['a']) 119 | self.protocol.dataReceived('bb') 120 | self.assertEqual(self.protocol.receiver.calls, ['a', 'b']) 121 | self.protocol.dataReceived('aa') 122 | self.assertEqual(self.protocol.receiver.calls, ['a', 'b', 'a']) 123 | 124 | def test_ruleSwitchingWithChunks(self): 125 | """Any number of rules can be called even during rule switching.""" 126 | self.protocol.makeConnection(None) 127 | self.protocol.receiver.returnMap.update(dict(a='someB', b='someA')) 128 | self.protocol.dataReceived('a') 129 | self.assertEqual(self.protocol.receiver.calls, []) 130 | self.protocol.dataReceived('ab') 131 | self.assertEqual(self.protocol.receiver.calls, ['a']) 132 | self.protocol.dataReceived('baa') 133 | self.assertEqual(self.protocol.receiver.calls, ['a', 'b', 'a']) 134 | 135 | def test_rulesCannotBeSwitchedDuringParsing(self): 136 | """ 137 | One can set a new rule during parsing, but it won't change the rule 138 | currently being parsed. 139 | """ 140 | self.protocol.makeConnection(None) 141 | self.protocol.dataReceived('aa') 142 | self.assertEqual(self.protocol.receiver.calls, ['a']) 143 | self.protocol.dataReceived('a') 144 | self.assertEqual(self.protocol.receiver.calls, ['a']) 145 | self.protocol.receiver.currentRule = 'someC' 146 | self.protocol.dataReceived('acc') 147 | self.assertEqual(self.protocol.receiver.calls, ['a', 'a', 'c']) 148 | 149 | def test_connectionLoss(self): 150 | """The reason for connection loss is forwarded to the receiver.""" 151 | self.protocol.makeConnection(None) 152 | reason = object() 153 | self.protocol.connectionLost(reason) 154 | self.assertEqual(self.protocol.receiver.lossReason, reason) 155 | 156 | def test_parseFailure(self): 157 | """ 158 | Parse failures cause connection abortion with the parse error as the 159 | reason. 160 | """ 161 | transport = FakeTransport() 162 | self.protocol.makeConnection(transport) 163 | self.protocol.dataReceived('b') 164 | self.failIfEqual(self.protocol.receiver.lossReason, None) 165 | self.assertTrue( 166 | isinstance(self.protocol.receiver.lossReason.value, ParseError)) 167 | self.assert_(transport.aborted) 168 | 169 | def test_exceptionsRaisedFromReceiver(self): 170 | """ 171 | Raising an exception from receiver methods called from the grammar 172 | propagate to finishParsing. 173 | """ 174 | transport = FakeTransport() 175 | self.protocol.makeConnection(transport) 176 | self.protocol.dataReceived('e') 177 | self.failIfEqual(self.protocol.receiver.lossReason, None) 178 | self.assertTrue( 179 | isinstance(self.protocol.receiver.lossReason.value, SomeException)) 180 | self.assert_(transport.aborted) 181 | 182 | def test_dataIgnoredAfterDisconnection(self): 183 | """After connectionLost is called, all incoming data is ignored.""" 184 | transport = FakeTransport() 185 | self.protocol.makeConnection(transport) 186 | reason = object() 187 | self.protocol.connectionLost(reason) 188 | self.protocol.dataReceived('d') 189 | self.assertEqual(self.protocol.receiver.lossReason, reason) 190 | self.assert_(not transport.aborted) 191 | -------------------------------------------------------------------------------- /ometa/test/test_runtime.py: -------------------------------------------------------------------------------- 1 | from ometa.runtime import OMetaBase, ParseError, expected, eof 2 | from ometa.test.helpers import TestCase 3 | 4 | class RuntimeTests(TestCase): 5 | """ 6 | Tests for L{pymeta.runtime}. 7 | """ 8 | 9 | def test_anything(self): 10 | """ 11 | L{OMetaBase.rule_anything} returns each item from the input 12 | along with its position. 13 | """ 14 | 15 | data = "foo" 16 | o = OMetaBase(data) 17 | 18 | for i, c in enumerate(data): 19 | v, e = o.rule_anything() 20 | self.assertEqual((c, i), (v, e.args[0])) 21 | 22 | 23 | def test_exactly(self): 24 | """ 25 | L{OMetaBase.rule_exactly} returns the requested item from the input 26 | string along with its position, if it's there. 27 | """ 28 | 29 | data = "foo" 30 | o = OMetaBase(data) 31 | v, e = o.rule_exactly("f") 32 | self.assertEqual(v, "f") 33 | self.assertEqual(e.args[0], 0) 34 | 35 | def test_exactly_multi(self): 36 | """ 37 | L{OMetaBase.rule_exactly} returns the requested item from the input 38 | string along with its position, if it's there. 39 | """ 40 | 41 | data = "foo" 42 | o = OMetaBase(data) 43 | v, e = o.rule_exactly("fo") 44 | self.assertEqual(v, "fo") 45 | self.assertEqual(e.args[0], 0) 46 | 47 | def test_exactlyFail(self): 48 | """ 49 | L{OMetaBase.rule_exactly} raises L{ParseError} when the requested item 50 | doesn't match the input. The error contains info on what was expected 51 | and the position. 52 | """ 53 | 54 | data = "foo" 55 | o = OMetaBase(data) 56 | exc = self.assertRaises(ParseError, o.rule_exactly, "g") 57 | self.assertEquals(exc.args[1], expected(None, "g")) 58 | self.assertEquals(exc.args[0], 0) 59 | 60 | 61 | 62 | def test_token(self): 63 | """ 64 | L{OMetaBase.rule_token} matches all the characters in the given string 65 | plus any preceding whitespace. 66 | """ 67 | 68 | data = " foo bar" 69 | o = OMetaBase(data) 70 | v, e = o.rule_token("foo") 71 | self.assertEqual(v, "foo") 72 | self.assertEqual(e.args[0], 4) 73 | v, e = o.rule_token("bar") 74 | self.assertEqual(v, "bar") 75 | self.assertEqual(e.args[0], 8) 76 | 77 | 78 | def test_tokenFailed(self): 79 | """ 80 | On failure, L{OMetaBase.rule_token} produces an error indicating the 81 | position where match failure occurred and the expected character. 82 | """ 83 | data = "foozle" 84 | o = OMetaBase(data) 85 | exc = self.assertRaises(ParseError, o.rule_token, "fog") 86 | self.assertEqual(exc.args[0], 2) 87 | self.assertEqual(exc.args[1], expected("token", "fog")) 88 | 89 | 90 | def test_many(self): 91 | """ 92 | L{OMetaBase.many} returns a list of parsed values and the error that 93 | caused the end of the loop. 94 | """ 95 | 96 | data = "ooops" 97 | o = OMetaBase(data) 98 | self.assertEqual(o.many(lambda: o.rule_exactly('o')), 99 | (['o'] * 3, ParseError(o.input, 3, 100 | expected(None, 'o')))) 101 | 102 | 103 | def test_or(self): 104 | """ 105 | L{OMetaBase._or} returns the result of the first of its 106 | arguments to succeed. 107 | """ 108 | 109 | data = "a" 110 | 111 | o = OMetaBase(data) 112 | called = [False, False, False] 113 | targets = ['b', 'a', 'c'] 114 | matchers = [] 115 | for i, m in enumerate(targets): 116 | def match(i=i, m=m): 117 | called[i] = True 118 | return o.exactly(m) 119 | matchers.append(match) 120 | 121 | v, e = o._or(matchers) 122 | self.assertEqual(called, [True, True, False]) 123 | self.assertEqual(v, 'a') 124 | self.assertEqual(e.args[0], 0) 125 | 126 | 127 | def test_orSimpleFailure(self): 128 | """ 129 | When none of the alternatives passed to L{OMetaBase._or} succeed, the 130 | one that got the furthest is returned. 131 | """ 132 | 133 | data = "foozle" 134 | o = OMetaBase(data) 135 | 136 | exc = self.assertRaises( 137 | ParseError, o._or, [ 138 | lambda: o.token("fog"), 139 | lambda: o.token("foozik"), 140 | lambda: o.token("woozle") 141 | ] 142 | ) 143 | self.assertEqual(exc.args[0], 4) 144 | self.assertEqual(exc.args[1], expected("token", "foozik")) 145 | 146 | 147 | def test_orFalseSuccess(self): 148 | """ 149 | When a failing branch of L{OMetaBase._or} gets further than a 150 | succeeding one, its error is returned instead of the success branch's. 151 | """ 152 | 153 | data = "foozle" 154 | o = OMetaBase(data) 155 | 156 | v, e = o._or( [lambda: o.token("fog"), 157 | lambda: o.token("foozik"), 158 | lambda: o.token("f")]) 159 | self.assertEqual(e.args[0], 4) 160 | self.assertEqual(e.args[1], expected("token", "foozik")) 161 | 162 | def test_orErrorTie(self): 163 | """ 164 | When branches of L{OMetaBase._or} produce errors that tie for rightmost 165 | position, they are merged. 166 | """ 167 | 168 | data = "foozle" 169 | o = OMetaBase(data) 170 | 171 | v, e = o._or( [lambda: o.token("fog"), 172 | lambda: o.token("foz"), 173 | lambda: o.token("f")]) 174 | self.assertEqual(e.args[0], 2) 175 | self.assertEqual(set(e.args[1]), 176 | set([expected("token", "fog")[0], 177 | expected("token", "foz")[0]])) 178 | 179 | 180 | def test_notError(self): 181 | """ 182 | When L{OMetaBase._not} fails, its error contains the current 183 | input position and no error info. 184 | """ 185 | 186 | data = "xy" 187 | o = OMetaBase(data) 188 | exc = self.assertRaises(ParseError, o._not, lambda: o.exactly("x")) 189 | self.assertEqual(exc.args[0], 1) 190 | self.assertEqual(exc.args[1], None) 191 | 192 | 193 | def test_spaces(self): 194 | """ 195 | L{OMetaBase.rule_spaces} provides error information. 196 | """ 197 | 198 | data = " xyz" 199 | o = OMetaBase(data) 200 | v, e = o.rule_spaces() 201 | 202 | self.assertEqual(e.args[0], 2) 203 | 204 | def test_predSuccess(self): 205 | """ 206 | L{OMetaBase.pred} returns True and empty error info on success. 207 | """ 208 | 209 | o = OMetaBase("") 210 | v, e = o.pred(lambda: (True, ParseError(o.input, 0, None))) 211 | self.assertEqual((v, e), (True, ParseError(o.input, 0, None))) 212 | 213 | 214 | def test_predFailure(self): 215 | """ 216 | L{OMetaBase.pred} returns True and empty error info on success. 217 | """ 218 | 219 | o = OMetaBase("") 220 | exc = self.assertRaises( 221 | ParseError, o.pred, lambda: (False, ParseError(o.input, 0, None))) 222 | self.assertEqual(exc, ParseError(o.input, 0, None)) 223 | 224 | 225 | def test_end(self): 226 | """ 227 | L{OMetaBase.rule_end} matches the end of input and raises L{ParseError} 228 | if input is left. 229 | """ 230 | o = OMetaBase("abc") 231 | exc = self.assertRaises(ParseError, o.rule_end) 232 | self.assertEqual(exc, ParseError(o.input, 1, None)) 233 | o.many(o.rule_anything) 234 | self.assertEqual(o.rule_end(), (True, ParseError("abc", 3, None))) 235 | 236 | def test_label(self): 237 | """ 238 | L{OMetaBase.label} returns a list of parsed values and the error that 239 | caused the end of the loop. 240 | """ 241 | 242 | data = "ooops" 243 | label = 'CustomLabel' 244 | o = OMetaBase(data) 245 | exc = self.assertRaises( 246 | ParseError, o.label, lambda: o.rule_exactly('x'), label) 247 | self.assertEqual(exc, 248 | ParseError(o.input, 0, expected(label)).withMessage([("Custom Exception:", label, None)])) 249 | 250 | def test_letter(self): 251 | """ 252 | L{OMetaBase.rule_letter} matches letters. 253 | """ 254 | o = OMetaBase("a1") 255 | v, e = o.rule_letter() 256 | self.assertEqual((v, e), ("a", ParseError(o.input, 0, None))) 257 | exc = self.assertRaises(ParseError, o.rule_letter) 258 | self.assertEqual(exc, ParseError(o.input, 1, 259 | expected("letter"))) 260 | 261 | 262 | def test_letterOrDigit(self): 263 | """ 264 | L{OMetaBase.rule_letterOrDigit} matches alphanumerics. 265 | """ 266 | o = OMetaBase("a1@") 267 | v, e = o.rule_letterOrDigit() 268 | self.assertEqual((v, e), ("a", ParseError(None, 0, None))) 269 | v, e = o.rule_letterOrDigit() 270 | self.assertEqual((v, e), ("1", ParseError(None, 1, None))) 271 | exc = self.assertRaises(ParseError, o.rule_letterOrDigit) 272 | self.assertEqual(exc, 273 | ParseError(o.input, 2, expected("letter or digit"))) 274 | 275 | 276 | def test_digit(self): 277 | """ 278 | L{OMetaBase.rule_digit} matches digits. 279 | """ 280 | o = OMetaBase("1a") 281 | v, e = o.rule_digit() 282 | self.assertEqual((v, e), ("1", ParseError("1a", 0, None))) 283 | exc = self.assertRaises(ParseError, o.rule_digit) 284 | self.assertEqual(exc, ParseError(o.input, 1, expected("digit"))) 285 | 286 | 287 | 288 | def test_listpattern(self): 289 | """ 290 | L{OMetaBase.rule_listpattern} matches contents of lists. 291 | """ 292 | o = OMetaBase([["a"]], tree=True) 293 | v, e = o.listpattern(lambda: o.exactly("a")) 294 | self.assertEqual((v, e), (["a"], ParseError("a", 0, None))) 295 | -------------------------------------------------------------------------------- /ometa/test/test_tube.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals 2 | 3 | import unittest 4 | 5 | from ometa.grammar import OMeta 6 | from ometa.tube import TrampolinedParser 7 | 8 | 9 | def iterbytes(originalBytes): 10 | for i in range(len(originalBytes)): 11 | yield originalBytes[i:i+1] 12 | 13 | 14 | class TrampolinedReceiver(): 15 | """ 16 | Receive and store the passed in data. 17 | """ 18 | 19 | currentRule = 'initial' 20 | 21 | def __init__(self): 22 | self.received = [] 23 | 24 | def receive(self, data): 25 | self.received.append(data) 26 | 27 | 28 | class TrampolinedParserTestCase(unittest.TestCase): 29 | """ 30 | Tests for L{ometa.tube.TrampolinedParser} 31 | """ 32 | 33 | def _parseGrammar(self, grammar, name="Grammar"): 34 | return OMeta(grammar).parseGrammar(name) 35 | 36 | def setUp(self): 37 | _grammar = r""" 38 | delimiter = '\r\n' 39 | initial = <(~delimiter anything)*>:val delimiter -> receiver.receive(val) 40 | witharg :arg1 :arg2 = <(~delimiter anything)*>:a delimiter -> receiver.receive(arg1+arg2+a) 41 | """ 42 | self.grammar = self._parseGrammar(_grammar) 43 | 44 | def test_dataNotFullyReceived(self): 45 | """ 46 | Since the initial rule inside the grammar is not matched, the receiver 47 | shouldn't receive any byte. 48 | """ 49 | receiver = TrampolinedReceiver() 50 | trampolinedParser = TrampolinedParser(self.grammar, receiver, {}) 51 | buf = 'foobarandnotreachdelimiter' 52 | for c in iterbytes(buf): 53 | trampolinedParser.receive(c) 54 | self.assertEqual(receiver.received, []) 55 | 56 | 57 | def test_dataFullyReceived(self): 58 | """ 59 | The receiver should receive the data according to the grammar. 60 | """ 61 | receiver = TrampolinedReceiver() 62 | trampolinedParser = TrampolinedParser(self.grammar, receiver, {}) 63 | buf = '\r\n'.join(('foo', 'bar', 'foo', 'bar')) 64 | for c in iterbytes(buf): 65 | trampolinedParser.receive(c) 66 | self.assertEqual(receiver.received, ['foo', 'bar', 'foo']) 67 | trampolinedParser.receive('\r\n') 68 | self.assertEqual(receiver.received, ['foo', 'bar', 'foo', 'bar']) 69 | 70 | 71 | def test_bindings(self): 72 | """ 73 | The passed-in bindings should be accessible inside the grammar. 74 | """ 75 | receiver = TrampolinedReceiver() 76 | grammar = r""" 77 | initial = digit:d (-> int(d)+SMALL_INT):val -> receiver.receive(val) 78 | """ 79 | bindings = {'SMALL_INT': 3} 80 | TrampolinedParser(self._parseGrammar(grammar), receiver, bindings).receive('0') 81 | self.assertEqual(receiver.received, [3]) 82 | 83 | 84 | def test_currentRuleWithArgs(self): 85 | """ 86 | TrampolinedParser should be able to invoke curruent rule with args. 87 | """ 88 | receiver = TrampolinedReceiver() 89 | receiver.currentRule = "witharg", "nice ", "day" 90 | trampolinedParser = TrampolinedParser(self.grammar, receiver, {}) 91 | buf = ' oh yes\r\n' 92 | for c in iterbytes(buf): 93 | trampolinedParser.receive(c) 94 | self.assertEqual(receiver.received, ["nice day oh yes"]) 95 | -------------------------------------------------------------------------------- /ometa/test/test_vm_builder.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from terml.nodes import termMaker as t 3 | from ometa.vm_builder import writeBytecode, writeBytecodeRule, writeBytecodeGrammar 4 | 5 | 6 | class TestVMBuilder(TestCase): 7 | def test_exactly(self): 8 | x = t.Exactly("a") 9 | self.assertEqual(writeBytecode(x), 10 | [t.Match("a")]) 11 | 12 | def test_apply(self): 13 | one = t.Action("1") 14 | x = t.Action("x") 15 | a = t.Apply("foo", "main", [one, x]) 16 | self.assertEqual(writeBytecode(a), 17 | [t.Python('1'), 18 | t.Push(), 19 | t.Python('x'), 20 | t.Push(), 21 | t.Call('foo')]) 22 | 23 | def test_foreignApply(self): 24 | one = t.Action("1") 25 | x = t.Action("x") 26 | a = t.ForeignApply("thegrammar", "foo", "main", [one, x]) 27 | self.assertEqual(writeBytecode(a), 28 | [t.Python('1'), 29 | t.Push(), 30 | t.Python('x'), 31 | t.Push(), 32 | t.ForeignCall('thegrammar', 'foo')]) 33 | 34 | def test_superApply(self): 35 | one = t.Action("1") 36 | x = t.Action("x") 37 | a = t.Apply("super", "main", [one, x]) 38 | self.assertEqual(writeBytecode(a), 39 | [t.Python('1'), 40 | t.Push(), 41 | t.Python('x'), 42 | t.Push(), 43 | t.SuperCall('main')]) 44 | 45 | def test_many(self): 46 | xs = t.Many(t.Exactly("x")) 47 | self.assertEqual(writeBytecode(xs), 48 | [t.Choice(3), 49 | t.Match("x"), 50 | t.Commit(-2)]) 51 | # self.assertEqual(writeBytecode(xs), 52 | # [t.Choice(3), 53 | # t.Match("x"), 54 | # t.PartialCommit(0)]) 55 | 56 | def test_many1(self): 57 | xs = t.Many1(t.Exactly("x")) 58 | self.assertEqual(writeBytecode(xs), 59 | [t.Match('x'), 60 | t.Choice(3), 61 | t.Match('x'), 62 | t.Commit(-2)]) 63 | 64 | # self.assertEqual(writeBytecode(xs), 65 | # [t.Match('x'), 66 | # t.Choice(4), 67 | # t.Match('x'), 68 | # t.PartialCommit(1)]) 69 | 70 | def test_tripleOr(self): 71 | xy = t.Or([t.Exactly("x"), 72 | t.Exactly("y"), 73 | t.Exactly("z")]) 74 | self.assertEqual(writeBytecode(xy), 75 | [t.Choice(3), 76 | t.Match('x'), 77 | t.Commit(5), 78 | t.Choice(3), 79 | t.Match('y'), 80 | t.Commit(2), 81 | t.Match('z')]) 82 | 83 | def test_doubleOr(self): 84 | xy = t.Or([t.Exactly("x"), 85 | t.Exactly("y")]) 86 | self.assertEqual(writeBytecode(xy), 87 | [t.Choice(3), 88 | t.Match('x'), 89 | t.Commit(2), 90 | t.Match('y')]) 91 | 92 | def test_singleOr(self): 93 | x1 = t.Or([t.Exactly("x")]) 94 | x = t.Exactly("x") 95 | self.assertEqual(writeBytecode(x1), 96 | writeBytecode(x)) 97 | 98 | def test_optional(self): 99 | x = t.Optional(t.Exactly("x")) 100 | self.assertEqual(writeBytecode(x), 101 | [t.Choice(3), 102 | t.Match('x'), 103 | t.Commit(2), 104 | t.Python("None")]) 105 | 106 | def test_not(self): 107 | x = t.Not(t.Exactly("x")) 108 | self.assertEqual(writeBytecode(x), 109 | [t.Choice(4), 110 | t.Match('x'), 111 | t.Commit(1), 112 | t.Fail()]) 113 | 114 | # self.assertEqual(writeBytecode(x), 115 | # [t.Choice(3), 116 | # t.Match('x'), 117 | # t.FailTwice()]) 118 | 119 | def test_lookahead(self): 120 | x = t.Lookahead(t.Exactly("x")) 121 | self.assertEqual(writeBytecode(x), 122 | [t.Choice(7), 123 | t.Choice(4), 124 | t.Match('x'), 125 | t.Commit(1), 126 | t.Fail(), 127 | t.Commit(1), 128 | t.Fail()]) 129 | 130 | # self.assertEqual(writeBytecode(x), 131 | # [t.Choice(5), 132 | # t.Choice(2), 133 | # t.Match('x'), 134 | # t.Commit(1), 135 | # t.Fail()]) 136 | 137 | def test_sequence(self): 138 | x = t.Exactly("x") 139 | y = t.Exactly("y") 140 | z = t.And([x, y]) 141 | self.assertEqual(writeBytecode(z), 142 | [t.Match('x'), 143 | t.Match('y')]) 144 | 145 | def test_bind(self): 146 | x = t.Exactly("x") 147 | b = t.Bind("var", x) 148 | self.assertEqual(writeBytecode(b), 149 | [t.Match('x'), 150 | t.Bind('var')]) 151 | 152 | def test_bind_apply(self): 153 | x = t.Apply("members", "object", []) 154 | b = t.Bind("m", x) 155 | self.assertEqual(writeBytecode(b), 156 | [t.Call('members'), 157 | t.Bind('m')]) 158 | 159 | def test_pred(self): 160 | x = t.Predicate(t.Action("doStuff()")) 161 | self.assertEqual(writeBytecode(x), 162 | [t.Python('doStuff()'), 163 | t.Predicate()]) 164 | 165 | def test_listpattern(self): 166 | x = t.List(t.Exactly("x")) 167 | self.assertEqual(writeBytecode(x), 168 | [t.Descend(), 169 | t.Match('x'), 170 | t.Ascend()]) 171 | 172 | def test_rule(self): 173 | x = t.Rule("foo", t.Exactly("x")) 174 | k, v = writeBytecodeRule(x) 175 | self.assertEqual(k, "foo") 176 | self.assertEqual(v, [t.Match('x')]) 177 | 178 | def test_grammar(self): 179 | r1 = t.Rule("foo", t.Exactly("x")) 180 | r2 = t.Rule("baz", t.Exactly("y")) 181 | x = t.Grammar("BuilderTest", False, [r1, r2]) 182 | g = writeBytecodeGrammar(x) 183 | self.assertEqual(sorted(g.keys()), ['baz', 'foo']) 184 | self.assertEqual(g['foo'], [t.Match('x')]) 185 | self.assertEqual(g['baz'], [t.Match('y')]) 186 | 187 | def test_repeat(self): 188 | x = t.Repeat(3, 4, t.Exactly('x')) 189 | self.assertEqual(writeBytecode(x), 190 | [t.Python("3"), 191 | t.Push(), 192 | t.Python("4"), 193 | t.Push(), 194 | t.RepeatChoice(3), 195 | t.Match('x'), 196 | t.Commit(-2)]) 197 | 198 | def test_consumedby(self): 199 | x = t.ConsumedBy(t.Exactly('x')) 200 | self.assertEqual(writeBytecode(x), 201 | [t.StartSlice(), 202 | t.Match('x'), 203 | t.EndSlice()]) 204 | -------------------------------------------------------------------------------- /ometa/tube.py: -------------------------------------------------------------------------------- 1 | from ometa.interp import TrampolinedGrammarInterpreter, _feed_me 2 | 3 | class TrampolinedParser: 4 | """ 5 | A parser that incrementally parses incoming data. 6 | """ 7 | def __init__(self, grammar, receiver, bindings): 8 | """ 9 | Initializes the parser. 10 | 11 | @param grammar: The grammar used to parse the incoming data. 12 | @param receiver: Responsible for logic operation on the parsed data. 13 | Typically, the logic operation will be invoked inside the grammar, 14 | e.g., rule = expr1 expr2 (-> receiver.doSomeStuff()) 15 | @param bindings: The namespace that can be accessed inside the grammar. 16 | """ 17 | self.grammar = grammar 18 | self.bindings = dict(bindings) 19 | self.bindings['receiver'] = self.receiver = receiver 20 | self._setupInterp() 21 | 22 | 23 | def _setupInterp(self): 24 | """ 25 | Resets the parser. The parser will begin parsing with the rule named 26 | 'initial'. 27 | """ 28 | self._interp = TrampolinedGrammarInterpreter( 29 | grammar=self.grammar, rule=self.receiver.currentRule, 30 | callback=None, globals=self.bindings) 31 | 32 | 33 | def receive(self, data): 34 | """ 35 | Receive the incoming data and begin parsing. The parser will parse the 36 | data incrementally according to the 'initial' rule in the grammar. 37 | 38 | @param data: The raw data received. 39 | """ 40 | while data: 41 | status = self._interp.receive(data) 42 | if status is _feed_me: 43 | return 44 | data = ''.join(self._interp.input.data[self._interp.input.position:]) 45 | self._setupInterp() 46 | -------------------------------------------------------------------------------- /ometa/vm.parsley: -------------------------------------------------------------------------------- 1 | #TreeTransformer 2 | Exactly(:x) -> [t.Match(x)] 3 | Token(:x) -> [t.Match(x)] 4 | Many(@x) -> [t.Choice(len(x) + 2)] + x + [t.Commit(-len(x) - 1)] 5 | Many1(@x) -> x + [t.Choice(len(x) + 2)] + x + [t.Commit(-len(x) - 1)] 6 | Repeat(:min :max @x) 7 | -> [t.Python(repr(int(min))), t.Push(), t.Python(repr(int(max))), 8 | t.Push(), t.RepeatChoice(len(x) + 2)] + x + [t.Commit(-len(x) - 1)] 9 | Optional(@x) -> [t.Choice(len(x) + 2)] + x + [t.Commit(2), t.Python("None")] 10 | # Right-associate Or() as needed. Note that Or() can have a list of a single 11 | # element. 12 | Or(@xs) = ?(len(xs) == 1) transform(xs[0]) 13 | | ?(len(xs) == 2) transform(t.Or(xs[0], xs[1])) 14 | | transform(t.Or(xs[0], t.Or(xs[1:]))) 15 | Or(@left @right) 16 | -> [t.Choice(len(left) + 2)] + left + [t.Commit(len(right) + 1)] + right 17 | Not(@x) -> [t.Choice(len(x) + 3)] + x + [t.Commit(1), t.Fail()] 18 | Lookahead(:x) = transform(t.Not(t.Not(x))) 19 | And(@xs) -> sum(xs, []) 20 | Bind(:name @x) -> x + [t.Bind(name)] 21 | Predicate(@x) -> x + [t.Predicate()] 22 | Action(:x) -> [t.Python(x.data)] 23 | Python(:x) -> [t.Python(x.data)] 24 | List(@x) -> [t.Descend()] + x + [t.Ascend()] 25 | ConsumedBy(@x) -> [t.StartSlice()] + x + [t.EndSlice()] 26 | 27 | pushes :xs -> [inner for x in xs for inner in [x[0], t.Push()]] 28 | Apply("super" :code @args) pushes(args):xs -> xs + [t.SuperCall(code)] 29 | Apply(:rule :code @args) pushes(args):xs -> xs + [t.Call(rule)] 30 | ForeignApply(:grammar :rule :code @args) pushes(args):xs -> (xs + 31 | [t.ForeignCall(grammar, rule)]) 32 | 33 | Rule(:name @xs) -> t.Rule(name, xs) 34 | Grammar(:name :tree @rules) -> t.Grammar(name, tree, rules) 35 | -------------------------------------------------------------------------------- /ometa/vm_builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | try: 3 | from StringIO import StringIO 4 | except ImportError: 5 | from io import StringIO 6 | 7 | from terml.nodes import Term, coerceToTerm, termMaker as t 8 | 9 | HERE = os.path.dirname(__file__) 10 | 11 | def writeBytecode(expr): 12 | print("Gonna compile %s" % (expr,)) 13 | from ometa.grammar import TreeTransformerGrammar 14 | from ometa.runtime import TreeTransformerBase 15 | path = os.path.join(HERE, 'vm.parsley') 16 | Compiler = TreeTransformerGrammar.makeGrammar(open(path).read(), 17 | "Compiler").createParserClass(TreeTransformerBase, {"t": t}) 18 | return Compiler.transform(expr)[0] 19 | 20 | 21 | def bytecodeToPython(expr): 22 | print("Gonna emit %s" % (expr,)) 23 | from ometa.grammar import TreeTransformerGrammar 24 | from ometa.runtime import TreeTransformerBase 25 | Emitter = TreeTransformerGrammar.makeGrammar(open("ometa/vm_emit.parsley").read(), 26 | "Emitter").createParserClass(TreeTransformerBase, {"t": t}) 27 | return Emitter.transform(expr)[0] 28 | 29 | 30 | def writeBytecodeRule(expr): 31 | e = GrammarEmitter() 32 | p = PythonWriter(expr) 33 | p.output(e) 34 | return list(e.rules.items())[0] 35 | 36 | 37 | def writeBytecodeGrammar(expr): 38 | e = GrammarEmitter() 39 | p = PythonWriter(expr) 40 | p.output(e) 41 | return e.rules 42 | 43 | 44 | class GrammarEmitter(object): 45 | def __init__(self): 46 | self.rules = {} 47 | self.tree = False 48 | 49 | def emitterForRule(self, name): 50 | e = Emitter() 51 | self.rules[name] = e.instrs 52 | return e 53 | 54 | 55 | class Emitter(object): 56 | 57 | def __init__(self): 58 | self.instrs = [] 59 | 60 | def emit(self, i, label=None): 61 | self.instrs.append(i) 62 | if label is not None: 63 | self.backpatch(label, len(self.instrs) - 1) 64 | return len(self.instrs) - 1 65 | 66 | def backpatch(self, fromIdx, toIdx): 67 | old = self.instrs[fromIdx] 68 | self.instrs[fromIdx] = Term(old.tag, None, [coerceToTerm(toIdx)]) 69 | 70 | def patchNext(self, target): 71 | self.backpatch(target, len(self.instrs)) 72 | 73 | 74 | class PythonWriter(object): 75 | """ 76 | Converts an OMeta syntax tree into Python source. 77 | """ 78 | def __init__(self, tree): 79 | self.tree = tree 80 | 81 | def output(self, out): 82 | self._generateNode(out, self.tree) 83 | 84 | def _generateNode(self, out, node, debugname=None): 85 | name = node.tag.name 86 | args = node.args 87 | if node.data is not None: 88 | out.emit(t.Literal(node.data)) 89 | return 90 | if name == 'null': 91 | out.emit(t.Python("None")) 92 | return getattr(self, "generate_"+name)(out, *args, debugname=debugname) 93 | 94 | def generate_Rule(self, out, name, expr, debugname=None): 95 | e = out.emitterForRule(name.data) 96 | self._generateNode(e, expr, name.data) 97 | 98 | def generate_Grammar(self, out, name, takesTreeInput, rules, 99 | debugname=None): 100 | for rule in rules.args: 101 | self._generateNode(out, rule, debugname) 102 | out.tree = takesTreeInput 103 | 104 | def generate_Apply(self, out, ruleName, codeName, rawArgs, debugname=None): 105 | for arg in rawArgs.args: 106 | self._generateNode(out, arg, debugname) 107 | out.emit(t.Push()) 108 | if ruleName.data == "super": 109 | out.emit(t.SuperCall(codeName)) 110 | else: 111 | out.emit(t.Call(ruleName)) 112 | 113 | def generate_ForeignApply(self, out, grammarName, ruleName, codeName, 114 | rawArgs, debugname=None): 115 | for arg in rawArgs.args: 116 | self._generateNode(out, arg, debugname) 117 | out.emit(t.Push()) 118 | out.emit(t.ForeignCall(grammarName, ruleName)) 119 | 120 | def generate_Exactly(self, out, literal, debugname=None): 121 | out.emit(t.Match(literal.data)) 122 | 123 | def generate_Token(self, out, literal, debugname=None): 124 | self.generate_Exactly(out, literal) 125 | 126 | def generate_Many(self, out, expr, debugname=None): 127 | L = out.emit(t.Choice()) 128 | self._generateNode(out, expr, debugname) 129 | L2 = out.emit(t.Commit()) 130 | out.patchNext(L) 131 | out.backpatch(L2, L) 132 | 133 | def generate_Many1(self, out, expr, debugname=None): 134 | self._generateNode(out, expr, debugname) 135 | self.generate_Many(out, expr, debugname) 136 | 137 | def generate_Repeat(self, out, min, max, expr, debugname=None): 138 | out.emit(t.Python(str(min.data))) 139 | out.emit(t.Push()) 140 | out.emit(t.Python(str(max.data))) 141 | out.emit(t.Push()) 142 | L = out.emit(t.RepeatChoice()) 143 | self._generateNode(out, expr, debugname) 144 | L2 = out.emit(t.Commit()) 145 | out.patchNext(L) 146 | out.backpatch(L2, L) 147 | 148 | def generate_Optional(self, out, expr, debugname=None): 149 | """ 150 | Try to parse an expr and continue if it fails. 151 | """ 152 | L = out.emit(t.Choice()) 153 | self._generateNode(out, expr, debugname) 154 | L2 = out.emit(t.Commit()) 155 | out.emit(t.Python("None"), label=L) 156 | out.patchNext(L2) 157 | 158 | def generate_Or(self, out, exprs, debugname=None): 159 | if len(exprs.args) == 1: 160 | self._generateNode(out, exprs.args[0]) 161 | return 162 | L = None 163 | lcs = [] 164 | for ex in exprs.args[:-1]: 165 | L = out.emit(t.Choice(), label=L) 166 | self._generateNode(out, ex) 167 | lcs.append(out.emit(t.Commit())) 168 | out.patchNext(L) 169 | self._generateNode(out, exprs.args[-1]) 170 | for LC in lcs: 171 | out.patchNext(LC) 172 | 173 | def generate_Not(self, out, expr, debugname=None): 174 | L1 = out.emit(t.Choice()) 175 | self._generateNode(out, expr) 176 | L2 = out.emit(t.Commit()) 177 | out.emit(t.Fail(), label=L1) 178 | out.patchNext(L2) 179 | 180 | def generate_Lookahead(self, out, expr, debugname=None): 181 | L1 = out.emit(t.Choice()) 182 | L2 = out.emit(t.Choice()) 183 | self._generateNode(out, expr) 184 | L3 = out.emit(t.Commit(), label=L2) 185 | out.emit(t.Fail(), label=L3) 186 | out.patchNext(L1) 187 | 188 | def generate_And(self, out, exprs, debugname=None): 189 | for ex in exprs.args: 190 | self._generateNode(out, ex) 191 | 192 | def generate_Bind(self, out, name, expr, debugname=None): 193 | self._generateNode(out, expr) 194 | out.emit(t.Bind(name)) 195 | 196 | def generate_Predicate(self, out, expr, debugname=None): 197 | self._generateNode(out, expr) 198 | out.emit(t.Predicate()) 199 | 200 | def generate_Action(self, out, expr, debugname=None): 201 | out.emit(t.Python(expr.data)) 202 | 203 | def generate_Python(self, out, expr, debugname=None): 204 | out.emit(t.Python(expr.data)) 205 | 206 | def generate_List(self, out, expr, debugname=None): 207 | out.emit(t.Descend()) 208 | self._generateNode(out, expr) 209 | out.emit(t.Ascend()) 210 | 211 | def generate_TermPattern(self, out, name, expr, debugname=None): 212 | raise NotImplementedError() 213 | 214 | def generate_StringTemplate(self, out, template, debugname=None): 215 | raise NotImplementedError() 216 | 217 | def generate_ConsumedBy(self, out, expr, debugname=None): 218 | out.emit(t.StartSlice()) 219 | self._generateNode(out, expr, debugname) 220 | out.emit(t.EndSlice()) 221 | -------------------------------------------------------------------------------- /ometa/vm_emit.parsley: -------------------------------------------------------------------------------- 1 | #TreeTransformer 2 | Grammar(str:name str:tree @rules) {{{ 3 | class $name: 4 | tree = $tree 5 | $rules 6 | }}} 7 | Rule(str:name @rules) {{{ 8 | $name = [ 9 | $rules 10 | ] 11 | }}} 12 | 13 | Ascend() --> t.Ascend(), 14 | Bind(str:x) --> t.Bind($x), 15 | Call(str:x) --> t.Call($x), 16 | Choice(str:x) --> t.Choice($x), 17 | Commit(str:x) --> t.Commit($x), 18 | Descend() --> t.Descend(), 19 | EndSlice() --> t.EndSlice(), 20 | Fail() --> t.Fail(), 21 | ForeignCall(str:x str:y) --> t.ForeignCall($x, $y), 22 | Match(str:x) --> t.Match($x), 23 | Predicate() --> t.Predicate(), 24 | Push() --> t.Push(), 25 | Python(str:x) --> t.Python($x), 26 | StartSlice() --> t.StartSlice(), 27 | SuperCall(str:x) --> t.SuperCall($x), 28 | 29 | str = anything:s -> str(s.data) 30 | -------------------------------------------------------------------------------- /parsley.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from ometa.grammar import OMeta 4 | from ometa.runtime import ParseError, EOFError, OMetaBase 5 | from terml.parser import parseTerm as term 6 | from terml.quasiterm import quasiterm 7 | 8 | __version__ = '1.3' 9 | 10 | 11 | def wrapGrammar(g, tracefunc=None): 12 | def makeParser(input): 13 | """ 14 | Creates a parser for the given input, with methods for 15 | invoking each rule. 16 | 17 | :param input: The string you want to parse. 18 | """ 19 | parser = g(input) 20 | if tracefunc: 21 | parser._trace = tracefunc 22 | return _GrammarWrapper(parser, input) 23 | makeParser._grammarClass = g 24 | return makeParser 25 | 26 | 27 | def makeGrammar(source, bindings, name='Grammar', unwrap=False, 28 | extends=wrapGrammar(OMetaBase), tracefunc=None): 29 | """ 30 | Create a class from a Parsley grammar. 31 | 32 | :param source: A grammar, as a string. 33 | :param bindings: A mapping of variable names to objects. 34 | :param name: Name used for the generated class. 35 | 36 | :param unwrap: If True, return a parser class suitable for 37 | subclassing. If False, return a wrapper with the 38 | friendly API. 39 | :param extends: The superclass for the generated parser class. 40 | 41 | :param tracefunc: A 3-arg function which takes a fragment of grammar 42 | source, the start/end indexes in the grammar of this 43 | fragment, and a position in the input. Invoked for 44 | terminals and rule applications. 45 | """ 46 | g = OMeta.makeGrammar(source, name).createParserClass( 47 | unwrapGrammar(extends), bindings) 48 | if unwrap: 49 | return g 50 | else: 51 | return wrapGrammar(g, tracefunc=tracefunc) 52 | 53 | 54 | def unwrapGrammar(w): 55 | """ 56 | Access the internal parser class for a Parsley grammar object. 57 | """ 58 | return getattr(w, '_grammarClass', None) or w 59 | 60 | 61 | class _GrammarWrapper(object): 62 | """ 63 | A wrapper for Parsley grammar instances. 64 | 65 | To invoke a Parsley rule, invoke a method with that name -- this 66 | turns x(input).foo() calls into grammar.apply("foo") calls. 67 | """ 68 | def __init__(self, grammar, input): 69 | self._grammar = grammar 70 | self._input = input 71 | #so pydoc doesn't get trapped in the __getattr__ 72 | self.__name__ = _GrammarWrapper.__name__ 73 | 74 | def __getattr__(self, name): 75 | """ 76 | Return a function that will instantiate a grammar and invoke the named 77 | rule. 78 | :param name: Rule name. 79 | """ 80 | def invokeRule(*args, **kwargs): 81 | """ 82 | Invoke a Parsley rule. Passes any positional args to the rule. 83 | """ 84 | try: 85 | ret, err = self._grammar.apply(name, *args) 86 | except ParseError as e: 87 | self._grammar.considerError(e) 88 | err = self._grammar.currentError 89 | else: 90 | try: 91 | extra, _ = self._grammar.input.head() 92 | except EOFError: 93 | return ret 94 | else: 95 | # problem is that input remains, so: 96 | err = ParseError(err.input, err.position + 1, 97 | [["message", "expected EOF"]], err.trail) 98 | raise err 99 | return invokeRule 100 | 101 | 102 | def makeProtocol(source, senderFactory, receiverFactory, bindings=None, 103 | name='Grammar'): 104 | """ 105 | Create a Twisted ``Protocol`` factory from a Parsley grammar. 106 | 107 | :param source: A grammar, as a string. 108 | :param senderFactory: A one-argument callable that takes a twisted 109 | ``Transport`` and returns a :ref:`sender `. 110 | :param receiverFactory: A one-argument callable that takes the sender 111 | returned by the ``senderFactory`` and returns a :ref:`receiver 112 | `. 113 | :param bindings: A mapping of variable names to objects which will be 114 | accessible from python code in the grammar. 115 | :param name: The name used for the generated grammar class. 116 | :returns: A nullary callable which will return an instance of 117 | :class:`~.ParserProtocol`. 118 | """ 119 | 120 | from ometa.protocol import ParserProtocol 121 | if bindings is None: 122 | bindings = {} 123 | grammar = OMeta(source).parseGrammar(name) 124 | return functools.partial( 125 | ParserProtocol, grammar, senderFactory, receiverFactory, bindings) 126 | 127 | 128 | def stack(*wrappers): 129 | """ 130 | Stack some senders or receivers for ease of wrapping. 131 | 132 | ``stack(x, y, z)`` will return a factory usable as a sender or receiver 133 | factory which will, when called with a transport or sender as an argument, 134 | return ``x(y(z(argument)))``. 135 | """ 136 | if not wrappers: 137 | raise TypeError('at least one argument is required') 138 | def factory(arg): 139 | ret = wrappers[-1](arg) 140 | for wrapper in wrappers[-2::-1]: 141 | ret = wrapper(ret) 142 | return ret 143 | return factory 144 | 145 | __all__ = [ 146 | 'makeGrammar', 'wrapGrammar', 'unwrapGrammar', 'term', 'quasiterm', 147 | 'makeProtocol', 'stack', 148 | ] 149 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Setup script for the Parsley distribution. 5 | """ 6 | 7 | from distutils.core import setup 8 | setup( 9 | name="Parsley", 10 | version="1.3", 11 | url="http://launchpad.net/parsley", 12 | description="Parsing and pattern matching made easy.", 13 | author="Allen Short", 14 | author_email="washort42@gmail.com", 15 | license="MIT License", 16 | long_description=open("README").read(), 17 | packages=["ometa", "terml", "ometa._generated", "terml._generated", 18 | "ometa.test", "terml.test"], 19 | py_modules=["parsley"] 20 | ) 21 | -------------------------------------------------------------------------------- /terml/README.txt: -------------------------------------------------------------------------------- 1 | TermL is JSON's big brother. 2 | It's described here: http://www.erights.org/data/terml/terml-spec.html 3 | 4 | In addition to JSON's dict, list, string, and number types, TermL 5 | supports arbitrary identifiers as tags, with optional parenthesized 6 | arguments. It's a nice representation for ASTs and the like, where you 7 | have a tree of things with a relatively small set of names. 8 | 9 | To use this code, do something like this: 10 | 11 | >>> from terml.parser import parseTerm 12 | >>> parseTerm('[foo(x), 3, FancyObject("bits", "bobs")]') 13 | Term('[foo(x), 3, FancyObject("bits", "bobs")]') 14 | 15 | >>> t = parseTerm('[foo(x), 3, FancyObject("bits", "bobs")]') 16 | 17 | >>> t.arglist 18 | [Term('foo(x)'), Term('3'), Term('FancyObject("bits", "bobs")')] 19 | 20 | >>> t.functor 21 | Tag('.tuple.') 22 | 23 | >>> t.arglist[0] 24 | Term('foo(x)') 25 | 26 | >>> t.arglist[0].functor 27 | Tag('foo') 28 | 29 | 30 | >>> t2 = parseTerm('{foo: 1, "foo": 11, f(o(o, 1): 1}') 31 | 32 | {foo: 1, "foo": 11, f(o(o, 1): 1} 33 | ^ 34 | Parse error at line 1, column 21: expected the token '}' 35 | 36 | Traceback (most recent call last): 37 | File "", line 1, in 38 | File "terml/parser.py", line 202, in parseTerm 39 | return _parseTerm(termString) 40 | File "terml/parser.py", line 186, in _parseTerm 41 | result, error = p.apply("term") 42 | File "/Users/washort/Projects/PyMeta/trunk/pymeta/runtime.py", line 278, in apply 43 | return self._apply(r, ruleName, args) 44 | File "/Users/washort/Projects/PyMeta/trunk/pymeta/runtime.py", line 307, in _apply 45 | [rule(), self.input]) 46 | File "/pymeta_generated_code/pymeta_grammar__TermLParser.py", line 483, in rule_term 47 | File "/Users/washort/Projects/PyMeta/trunk/pymeta/runtime.py", line 397, in _or 48 | raise joinErrors(errors) 49 | pymeta.runtime.ParseError: (21, [('expected', 'token', "'}'")]) 50 | 51 | >>> terml.parser.parseTerm("foo(())") 52 | 53 | foo(()) 54 | ^ 55 | Parse error at line 1, column 4: expected one of ')', token '[', token '"', token "'", '0', a digit, a letter, '_', '$', '.', '<', ':', token '${', token '$', token '@{', token '@', token '{', '-', ' ', '\t', '\x0c', or '#' 56 | 57 | Traceback (most recent call last): 58 | File "", line 1, in 59 | File "terml/parser.py", line 202, in parseTerm 60 | return _parseTerm(termString) 61 | File "terml/parser.py", line 192, in _parseTerm 62 | raise error 63 | pymeta.runtime.ParseError: (4, [('expected', None, ')'), ('expected', 'token', '['), ('expected', 'token', '"'), ('expected', 'token', "'"), ('expected', None, '0'), ('expected', 'digit', None), ('expected', 'letter', None), ('expected', None, '_'), ('expected', None, '$'), ('expected', None, '.'), ('expected', None, '<'), ('expected', None, ':'), ('expected', 'token', '${'), ('expected', 'token', '$'), ('expected', 'token', '@{'), ('expected', 'token', '@'), ('expected', 'token', '{'), ('expected', None, '-'), ('expected', None, ' '), ('expected', None, '\t'), ('expected', None, '\x0c'), ('expected', None, '#')]) 64 | -------------------------------------------------------------------------------- /terml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/terml/__init__.py -------------------------------------------------------------------------------- /terml/_generated/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/terml/_generated/__init__.py -------------------------------------------------------------------------------- /terml/nodes.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import sys 3 | 4 | try: 5 | basestring 6 | scalar_types = (str, unicode, int, long, float) 7 | integer_types = (int, long) 8 | except NameError: 9 | basestring = str 10 | scalar_types = (str, int, float) 11 | integer_types = (int,) 12 | 13 | _Term = namedtuple("Term", "tag data args span") 14 | class Term(_Term): 15 | def __new__(cls, tag, data, args, span): 16 | #XXX AstroTag tracks (name, tag_code) and source span 17 | if data and not isinstance(data, scalar_types): 18 | raise ValueError("Term data can't be of type %r" % (type(data),)) 19 | if data and args: 20 | raise ValueError("Term %s can't have both data and children" % (tag,)) 21 | if args is None: 22 | args = () 23 | return _Term.__new__(cls, tag, data, tuple(args), span) 24 | 25 | def __iter__(self): 26 | #and now I feel a bit silly subclassing namedtuple 27 | raise NotImplementedError() 28 | 29 | def __eq__(self, other): 30 | try: 31 | if self.tag.name == ".bag." and other.tag.name == ".bag.": 32 | return (self.data, set(self.args) 33 | ) == (other.data, set(other.args)) 34 | return ( self.tag, self.data, self.args 35 | ) == (other.tag, other.data, other.args) 36 | except AttributeError: 37 | return False 38 | 39 | 40 | def __hash__(self): 41 | return hash((Term, self.tag, self.data, self.args)) 42 | 43 | 44 | def __repr__(self): 45 | return "term('%s')" % (self._unparse(4).replace("'", "\\'")) 46 | 47 | 48 | def _unparse(self, indentLevel=0): 49 | newlineAndIndent = '\n' + (' ' * indentLevel) 50 | if self.data is not None: 51 | if self.tag.name == '.String.': 52 | return '"%s"' % repr(self.data)[1:-1].replace("\\'", "'").replace('"', '\\\\"') 53 | elif self.tag.name == '.char.': 54 | return "'%s'" % repr(self.data)[1:-1].replace("'", "\\'").replace('\\"', '"') 55 | else: 56 | return str(self.data) 57 | args = ', '.join([a._unparse() for a in self.args]) 58 | if self.tag.name == '.tuple.': 59 | return "[%s]" % (args,) 60 | elif self.tag.name == '.attr.': 61 | return "%s: %s" % (self.args[0]._unparse(indentLevel), 62 | self.args[1]._unparse(indentLevel)) 63 | elif self.tag.name == '.bag.': 64 | return "{%s}" % (args,) 65 | elif len(self.args) == 1 and self.args[0].tag.name == '.bag.': 66 | return "%s%s" % (self.tag._unparse(indentLevel), args) 67 | else: 68 | if len(self.args) == 0: 69 | return self.tag._unparse(indentLevel) 70 | return "%s(%s)" % (self.tag._unparse(indentLevel), args) 71 | 72 | def withSpan(self, span): 73 | return Term(self.tag, self.data, self.args, span) 74 | 75 | 76 | def build(self, builder): 77 | if self.data is None: 78 | f = builder.leafTag(self.tag, self.span) 79 | else: 80 | f = builder.leafData(self.data, self.span) 81 | 82 | return builder.term(f, [arg.build(builder) for arg in self.args]) 83 | 84 | 85 | def __cmp__(self, other): 86 | tagc = cmp(self.tag, other.tag) 87 | if tagc: 88 | return tagc 89 | datac = cmp(self.data, other.data) 90 | if datac: 91 | return datac 92 | if self.tag.name == ".bag." and other.tag.name == ".bag.": 93 | return cmp(set(self.args), set(other.args)) 94 | return cmp(self.args, other.args) 95 | 96 | def __int__(self): 97 | return int(self.data) 98 | 99 | def __float__(self): 100 | return float(self.data) 101 | 102 | def withoutArgs(self): 103 | return Term(self.tag, self.data, (), self.span) 104 | 105 | def asFunctor(self): 106 | if self.args: 107 | raise ValueError("Terms with args can't be used as functors") 108 | else: 109 | return self.tag 110 | 111 | 112 | class Tag(object): 113 | def __init__(self, name): 114 | if name[0] == '': 115 | raise ValueError("Tags must have names") 116 | self.name = name 117 | 118 | def __eq__(self, other): 119 | return other.__class__ == self.__class__ and self.name == other.name 120 | 121 | def __ne__(self, other): 122 | return not self == other 123 | 124 | def __repr__(self): 125 | return "Tag(%r)" % (self.name,) 126 | 127 | def __hash__(self): 128 | return hash((Tag, self.name)) 129 | 130 | def _unparse(self, indentLevel=0): 131 | return self.name 132 | 133 | def coerceToTerm(val): 134 | from ometa.runtime import character, unicodeCharacter 135 | if isinstance(val, Term): 136 | return val 137 | if val is None: 138 | return Term(Tag("null"), None, None, None) 139 | if val is True: 140 | return Term(Tag("true"), None, None, None) 141 | if val is False: 142 | return Term(Tag("false"), None, None, None) 143 | if isinstance(val, integer_types): 144 | return Term(Tag(".int."), val, None, None) 145 | if isinstance(val, float): 146 | return Term(Tag(".float64."), val, None, None) 147 | if isinstance(val, (character, unicodeCharacter)): 148 | return Term(Tag(".char."), val, None, None) 149 | if isinstance(val, basestring): 150 | return Term(Tag(".String."), val, None, None) 151 | if isinstance(val, (list, tuple)): 152 | return Term(Tag(".tuple."), None, tuple(coerceToTerm(item) for item in val), None) 153 | if isinstance(val, set): 154 | return Term(Tag('.bag.'), None, tuple(coerceToTerm(item) for item in val), None) 155 | if isinstance(val, dict): 156 | return Term(Tag('.bag.'), None, tuple(Term(Tag('.attr.'), None, 157 | (coerceToTerm(k), coerceToTerm(v)), None) 158 | for (k, v) in val.items()), 159 | None) 160 | raise ValueError("Could not coerce %r to Term" % (val,)) 161 | 162 | class TermMaker(object): 163 | def __getattr__(self, name): 164 | def mkterm(*args, **kwargs): 165 | return Term(Tag(name), None, 166 | tuple([coerceToTerm(a) for a in args]), 167 | kwargs.get('span', None)) 168 | return mkterm 169 | 170 | termMaker = TermMaker() 171 | 172 | -------------------------------------------------------------------------------- /terml/parser.py: -------------------------------------------------------------------------------- 1 | import string 2 | from ometa.grammar import loadGrammar 3 | from ometa.runtime import character, EOFError 4 | import terml 5 | from terml.nodes import Tag, Term, termMaker 6 | 7 | try: 8 | integer_types = (int, long) 9 | except NameError: 10 | integer_types = (int,) 11 | 12 | 13 | ## Functions called from grammar actions 14 | 15 | def concat(*bits): 16 | return ''.join(map(str, bits)) 17 | 18 | Character = termMaker.Character 19 | 20 | def makeFloat(sign, ds, tail): 21 | return float((sign or '') + ds + tail) 22 | 23 | def signedInt(sign, x, base=10): 24 | return int(str((sign or '')+x), base) 25 | 26 | def join(x): 27 | return ''.join(x) 28 | 29 | def makeHex(sign, hs): 30 | return int((sign or '') + ''.join(hs), 16) 31 | 32 | def makeOctal(sign, ds): 33 | return int((sign or '') + '0'+''.join(ds), 8) 34 | 35 | def isDigit(x): 36 | return x in string.digits 37 | 38 | def isOctDigit(x): 39 | return x in string.octdigits 40 | 41 | def isHexDigit(x): 42 | return x in string.hexdigits 43 | 44 | def contains(container, value): 45 | return value in container 46 | 47 | def cons(first, rest): 48 | return [first] + rest 49 | 50 | def Character(char): 51 | return character(char) 52 | 53 | def makeTag(nameSegs): 54 | return Tag('::'.join(nameSegs)) 55 | 56 | def prefixedTag(tagnameSegs): 57 | return makeTag([''] + tagnameSegs) 58 | 59 | def tagString(string): 60 | return '"' + string + '"' 61 | 62 | def numberType(n): 63 | if isinstance(n, float): 64 | return ".float64." 65 | elif isinstance(n, integer_types): 66 | return ".int." 67 | raise ValueError("wtf") 68 | 69 | def leafInternal(tag, data, span=None): 70 | return Term(tag, data, None, None) 71 | 72 | def makeTerm(t, args=None, span=None): 73 | if isinstance(t, Term): 74 | if t.data is not None: 75 | if not args: 76 | return t 77 | else: 78 | raise ValueError("Literal terms can't have arguments") 79 | return Term(t.asFunctor(), None, args and tuple(args), span) 80 | 81 | 82 | def Tuple(args, span=None): 83 | return Term(Tag(".tuple."), None, tuple(args), span) 84 | 85 | def Bag(args, span=None): 86 | return Term(Tag(".bag."), None, tuple(args), span) 87 | 88 | def LabelledBag(f, arg, span=None): 89 | return Term(f.asFunctor(), None, (arg,), span) 90 | 91 | def Attr(k, v, span=None): 92 | return Term(Tag(".attr."), None, (k, v), span) 93 | 94 | TermLParser = loadGrammar(terml, "terml", globals()) 95 | 96 | 97 | def parseTerm(termString): 98 | """ 99 | Build a TermL term tree from a string. 100 | """ 101 | p = TermLParser(termString) 102 | result, error = p.apply("term") 103 | try: 104 | p.input.head() 105 | except EOFError: 106 | pass 107 | else: 108 | raise error 109 | return result 110 | -------------------------------------------------------------------------------- /terml/qnodes.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from collections import namedtuple 3 | from terml.nodes import Term, Tag, coerceToTerm 4 | 5 | try: 6 | basestring 7 | except NameError: 8 | basestring = str 9 | 10 | 11 | class QTerm(namedtuple("QTerm", "functor data args span")): 12 | """ 13 | A quasiterm, representing a template or pattern for a term tree. 14 | """ 15 | @property 16 | def tag(self): 17 | return self.functor.tag 18 | 19 | def _substitute(self, map): 20 | candidate = self.functor._substitute(map)[0] 21 | args = tuple(itertools.chain.from_iterable(a._substitute(map) for a in self.args)) 22 | term = Term(candidate.tag, candidate.data, args, self.span) 23 | return [term] 24 | 25 | def substitute(self, map): 26 | """ 27 | Fill $-holes with named values. 28 | 29 | @param map: A mapping of names to values to be inserted into 30 | the term tree. 31 | """ 32 | return self._substitute(map)[0] 33 | 34 | def match(self, specimen, substitutionArgs=()): 35 | """ 36 | Search a term tree for matches to this pattern. Returns a 37 | mapping of names to matched values. 38 | 39 | @param specimen: A term tree to extract values from. 40 | """ 41 | bindings = {} 42 | if self._match(substitutionArgs, [specimen], bindings, (), 1) == 1: 43 | return bindings 44 | raise TypeError("%r doesn't match %r" % (self, specimen)) 45 | 46 | def _reserve(self): 47 | return 1 48 | 49 | def _match(self, args, specimens, bindings, index, max): 50 | if not specimens: 51 | return -1 52 | spec = self._coerce(specimens[0]) 53 | if spec is None: 54 | return -1 55 | matches = self.functor._match(args, [spec.withoutArgs()], bindings, index, 1) 56 | if not matches: 57 | return -1 58 | if matches > 1: 59 | raise TypeError("Functor may only match 0 or 1 specimen") 60 | num = matchArgs(self.args, spec.args, args, bindings, index, len(spec.args)) 61 | if len(spec.args) == num: 62 | if max >= 1: 63 | return 1 64 | return -1 65 | 66 | def _coerce(self, spec): 67 | if isinstance(spec, Term): 68 | newf = coerceToQuasiMatch(spec.withoutArgs(), 69 | self.functor.isFunctorHole, 70 | self.tag) 71 | if newf is None: 72 | return None 73 | return Term(newf.asFunctor(), None, spec.args, None) 74 | else: 75 | return coerceToQuasiMatch(spec, self.functor.isFunctorHole, 76 | self.tag) 77 | 78 | def __eq__(self, other): 79 | return ( self.functor, self.data, self.args 80 | ) == (other.functor, other.data, other.args) 81 | 82 | def asFunctor(self): 83 | if self.args: 84 | raise ValueError("Terms with args can't be used as functors") 85 | else: 86 | return self.functor 87 | 88 | class QFunctor(namedtuple("QFunctor", "tag data span")): 89 | isFunctorHole = False 90 | def _reserve(self): 91 | return 1 92 | 93 | @property 94 | def name(self): 95 | return self.tag.name 96 | 97 | def _unparse(self, indentLevel=0): 98 | return self.tag._unparse(indentLevel) 99 | 100 | def _substitute(self, map): 101 | return [Term(self.tag, self.data, None, self.span)] 102 | 103 | def _match(self, args, specimens, bindings, index, max): 104 | if not specimens: 105 | return -1 106 | spec = coerceToQuasiMatch(specimens[0], False, self.tag) 107 | if spec is None: 108 | return -1 109 | if self.data is not None and self.data != spec.data: 110 | return -1 111 | if max >= 1: 112 | return 1 113 | return -1 114 | 115 | def asFunctor(self): 116 | return self 117 | 118 | def matchArgs(quasiArglist, specimenArglist, args, bindings, index, max): 119 | specs = specimenArglist 120 | reserves = [q._reserve() for q in quasiArglist] 121 | numConsumed = 0 122 | for i, qarg in enumerate(quasiArglist): 123 | num = qarg._match(args, specs, bindings, index, max - sum(reserves[i + 1:])) 124 | if num == -1: 125 | return -1 126 | specs = specs[num:] 127 | max -= num 128 | numConsumed += num 129 | return numConsumed 130 | 131 | 132 | def coerceToQuasiMatch(val, isFunctorHole, tag): 133 | if isFunctorHole: 134 | if val is None: 135 | result = Term(Tag("null"), None, None, None) 136 | elif isinstance(val, Term): 137 | if len(val.args) != 0: 138 | return None 139 | else: 140 | result = val 141 | elif isinstance(val, basestring): 142 | result = Term(Tag(val), None, None, None) 143 | elif isinstance(val, bool): 144 | result = Term(Tag(["false", "true"][val]), None, None, None) 145 | else: 146 | return None 147 | else: 148 | result = coerceToTerm(val) 149 | if tag is not None and result.tag != tag: 150 | return None 151 | return result 152 | 153 | class _Hole(namedtuple("_Hole", "tag name isFunctorHole")): 154 | def _reserve(self): 155 | return 1 156 | 157 | def __repr__(self): 158 | return "term('%s')" % (self._unparse(4).replace("'", "\\'")) 159 | 160 | def match(self, specimen, substitutionArgs=()): 161 | bindings = {} 162 | if self._match(substitutionArgs, [specimen], bindings, (), 1) != -1: 163 | return bindings 164 | raise TypeError("%r doesn't match %r" % (self, specimen)) 165 | 166 | 167 | def _multiget(args, holenum, index, repeat): 168 | result = args[holenum] 169 | for i in index: 170 | if not isinstance(result, list): 171 | return result 172 | result = result[i] 173 | return result 174 | 175 | def _multiput(bindings, holenum, index, newval): 176 | bits = bindings 177 | dest = holenum 178 | for it in index: 179 | next = bits[dest] 180 | if next is None: 181 | next = {} 182 | bits[dest] = next 183 | bits = next 184 | dest = it 185 | result = None 186 | if dest in bits: 187 | result = bits[dest] 188 | bits[dest] = newval 189 | return result 190 | 191 | class ValueHole(_Hole): 192 | def _unparse(self, indentLevel=0): 193 | return "${%s}" % (self.name,) 194 | 195 | def _substitute(self, map): 196 | termoid = map[self.name] 197 | val = coerceToQuasiMatch(termoid, self.isFunctorHole, self.tag) 198 | if val is None: 199 | raise TypeError("%r doesn't match %r" % (termoid, self)) 200 | return [val] 201 | 202 | def asFunctor(self): 203 | if self.isFunctorHole: 204 | return self 205 | else: 206 | return ValueHole(self.tag, self.name, True) 207 | 208 | 209 | class PatternHole(_Hole): 210 | 211 | def _unparse(self, indentLevel=0): 212 | if self.tag: 213 | return "%s@{%s}" % (self.tag.name, self.name) 214 | else: 215 | return "@{%s}" % (self.name,) 216 | 217 | def _match(self, args, specimens, bindings, index, max): 218 | if not specimens: 219 | return -1 220 | spec = coerceToQuasiMatch(specimens[0], self.isFunctorHole, self.tag) 221 | if spec is None: 222 | return -1 223 | oldval = _multiput(bindings, self.name, index, spec) 224 | if oldval is None or oldval != spec: 225 | if max >= 1: 226 | return 1 227 | return -1 228 | 229 | 230 | def asFunctor(self): 231 | if self.isFunctorHole: 232 | return self 233 | else: 234 | return PatternHole(self.tag, self.name, True) 235 | 236 | class QSome(namedtuple("_QSome", "value quant")): 237 | def _reserve(self): 238 | if self.quant == "+": 239 | return 1 240 | else: 241 | return 0 242 | -------------------------------------------------------------------------------- /terml/quasiterm.parsley: -------------------------------------------------------------------------------- 1 | schema = production+:ps -> schema(ps) 2 | production = tag:t ws '::=' argList:a ws ';' -> production(t, a) 3 | 4 | functor = (spaces ( (functorHole functorHole !(reserved("hole-tagged-hole"))) 5 | | ('.'? functorHole) 6 | | (tag:t functorHole:h) -> taggedHole(t, h))) 7 | | super 8 | 9 | arg = interleave:l (ws '|' interleave)*:r -> _or(l, *r) 10 | interleave = action:l (ws '&' action)*:r -> interleave(l, *r) 11 | action = pred:l (ws '->' pred:r -> action(l, *r) 12 | | -> l) 13 | pred = some | (ws '!' some:x -> not(x)) 14 | some = (quant:q -> some(None, q) 15 | | ( prim:l ( (ws '**' prim:r -> matchSeparatedSequence(l, r)) 16 | | (ws '++' prim:r -> matchSeparatedSequence1(l, r)) 17 | )?:seq 18 | quant?:q -> some(seq or l, q))) 19 | quant = ws ('?' |'+' | '*') 20 | prim = term 21 | | ('.' -> any()) 22 | | (literal:l ws '..' literal:r -> range(l, r)) 23 | | ws '^' string:s -> anyOf(s) 24 | | ws '(' argList:l ws ')' -> l 25 | 26 | simpleint = decdigits:ds -> int(ds) 27 | functorHole = '$' (simpleint:i | '{' simpleint:i '}' | (tag:t -> t.name):i) -> dollarHole(i) 28 | |('@' | '=') (simpleint:i | '{' simpleint:i '}' | (tag:t -> t.name):i) -> patternHole(i) 29 | -------------------------------------------------------------------------------- /terml/quasiterm.py: -------------------------------------------------------------------------------- 1 | from ometa.grammar import loadGrammar 2 | from ometa.runtime import EOFError 3 | import terml 4 | from terml.parser import TermLParser 5 | from terml.qnodes import ValueHole, PatternHole, QTerm, QSome, QFunctor 6 | 7 | 8 | def interleave(l, *r): 9 | if r: 10 | raise NotImplementedError() 11 | return l 12 | 13 | def _or(l, *r): 14 | if r: 15 | raise NotImplementedError() 16 | return l 17 | 18 | def some(value, quant): 19 | if quant: 20 | return QSome(value, quant) 21 | else: 22 | return value 23 | 24 | def dollarHole(i): 25 | return ValueHole(None, i, False) 26 | 27 | def patternHole(i): 28 | return PatternHole(None, i, False) 29 | 30 | def taggedHole(t, h): 31 | return h.__class__(t, h.name, h.isFunctorHole) 32 | 33 | def leafInternal(tag, data, span=None): 34 | return QFunctor(tag, data, span) 35 | 36 | 37 | def makeTerm(t, args=None, span=None): 38 | if args is None: 39 | return t 40 | else: 41 | if isinstance(t, QTerm): 42 | if t.data: 43 | if not args: 44 | return t 45 | else: 46 | raise ValueError("Literal terms can't have arguments") 47 | return QTerm(t.asFunctor(), None, args and tuple(args), span) 48 | 49 | 50 | QTermParser = loadGrammar(terml, "quasiterm", TermLParser.globals, TermLParser) 51 | QTermParser.globals.update(globals()) 52 | 53 | 54 | def quasiterm(termString): 55 | """ 56 | Build a quasiterm from a string. 57 | """ 58 | p = QTermParser(termString) 59 | result, error = p.apply("term") 60 | try: 61 | p.input.head() 62 | except EOFError: 63 | pass 64 | else: 65 | raise error 66 | return result 67 | -------------------------------------------------------------------------------- /terml/terml.parsley: -------------------------------------------------------------------------------- 1 | hspace = (' '|'\t'|'\f'|('#' (~eol anything)*)) 2 | ws = ('\r' '\n'|'\r' | '\n' | hspace)* 3 | 4 | number = ws barenumber 5 | barenumber = '-'?:sign (('0' ((('x'|'X') hexdigit*:hs -> makeHex(sign, hs)) 6 | |floatPart(sign '0') 7 | |octaldigit*:ds -> makeOctal(sign, ds))) 8 | |decdigits:ds floatPart(sign ds) 9 | |decdigits:ds -> signedInt(sign, ds)) 10 | 11 | 12 | exponent = <('e' | 'E') ('+' | '-')? decdigits> 13 | 14 | 15 | floatPart :sign :ds = <('.' decdigits exponent?) | exponent>:tail -> makeFloat(sign, ds, tail) 16 | 17 | decdigits = digit:d ((:x ?(isDigit(x)) -> x) | '_' -> "")*:ds -> concat(d, join(ds)) 18 | octaldigit = :x ?(isOctDigit(x)) -> x 19 | hexdigit = :x ?(isHexDigit(x)) -> x 20 | 21 | string = ws '"' (escapedChar | ~('"') anything)*:c '"' -> join(c) 22 | character = ws '\'' (escapedChar | ~('\''|'\n'|'\r'|'\\') anything):c '\'' -> Character(c) 23 | escapedUnicode = ('u' :hs -> unichr(int(hs, 16)) 24 | |'U' :hs -> unichr(int(hs, 16))) 26 | 27 | escapedOctal = ( <:a ?(contains("0123", a)) octdigit? octdigit?> 28 | | <:a ?(contains("4567", a)) octdigit?>):os -> int(os, 8) 29 | 30 | escapedChar = '\\' ('n' -> '\n' 31 | |'r' -> '\r' 32 | |'t' -> '\t' 33 | |'b' -> '\b' 34 | |'f' -> '\f' 35 | |'"' -> '"' 36 | |'\'' -> '\'' 37 | |'?' -> '?' 38 | |'\\' -> '\\' 39 | | escapedUnicode 40 | | escapedOctal 41 | | eol -> "") 42 | 43 | eol = hspace* ('\r' '\n'|'\r' | '\n') 44 | 45 | uriBody = <(letterOrDigit|'_'|';'|'/'|'?'|':'|'@'|'&'|'='|'+'|'$'|','|'-'|'.'|'!'|'~'|'*'|'\''|'('|')'|'%'|'\\'|'|'|'#')+> 46 | 47 | 48 | literal = string:x -> leafInternal(Tag(".String."), x) 49 | | character:x -> leafInternal(Tag(".char."), x) 50 | | number:x -> leafInternal(Tag(numberType(x)), x) 51 | 52 | tag = ( 53 | segment:seg1 (':' ':' sos)*:segs -> makeTag(cons(seg1, segs)) 54 | | (':' ':' sos)+:segs -> prefixedTag(segs)) 55 | 56 | sos = segment | (string:s -> tagString(s)) 57 | 58 | segment = ident | special | uri 59 | 60 | ident = segStart:i1 segPart*:ibits -> join(cons(i1, ibits)) 61 | 62 | segStart = letter | '_' | '$' 63 | 64 | segPart = letterOrDigit | '_' | '.' | '-' | '$' 65 | 66 | special = '.':a ident:b -> concat(a, b) 67 | 68 | uri = '<' uriBody*:uriChars '>' -> concat(b, uriChars, e) 69 | 70 | functor = ws (literal | tag:t -> leafInternal(t, None)) 71 | baseTerm = functor:f ('(' argList:a ws ')' -> makeTerm(f, a) 72 | | -> makeTerm(f, None)) 73 | 74 | arg = term 75 | 76 | argList = ((arg:t (ws ',' arg)*:ts ws ','?) -> cons(t, ts) 77 | | -> []) 78 | 79 | tupleTerm = ws '[' argList:a ws ']' -> Tuple(a) 80 | 81 | bagTerm = ws '{' argList:a ws '}' -> Bag(a) 82 | 83 | labelledBagTerm = functor:f bagTerm:b -> LabelledBag(f, b) 84 | 85 | extraTerm = tupleTerm | labelledBagTerm | bagTerm | baseTerm 86 | 87 | attrTerm = extraTerm:k ws ':' extraTerm:v -> Attr(k, v) 88 | 89 | term = ws (attrTerm | extraTerm) 90 | -------------------------------------------------------------------------------- /terml/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pyga/parsley/c89b3f0e09a9501f285a14ae446a77a56ee99942/terml/test/__init__.py -------------------------------------------------------------------------------- /terml/test/test_quasiterm.py: -------------------------------------------------------------------------------- 1 | 2 | from unittest import TestCase 3 | from terml.parser import parseTerm as term 4 | from terml.quasiterm import quasiterm 5 | 6 | class QuasiTermSubstituteTests(TestCase): 7 | 8 | def test_basic(self): 9 | x = quasiterm("foo($x, $y)").substitute({"x": 1, "y": term("baz")}) 10 | self.assertEqual(x, term("foo(1, baz)")) 11 | y = quasiterm("foo($0, ${1})").substitute([1, term("baz")]) 12 | self.assertEqual(y, term("foo(1, baz)")) 13 | 14 | 15 | def test_withArgs(self): 16 | x = quasiterm("$x(3)").substitute({"x": term("foo")}) 17 | self.assertEqual(x, term("foo(3)")) 18 | x = quasiterm("foo($x)").substitute({"x": term("baz(3)")}) 19 | self.assertEqual(x, term("foo(baz(3))")) 20 | self.assertRaises(TypeError, quasiterm("$x(3)").substitute, 21 | {"x": term("foo(3)")}) 22 | 23 | 24 | class QuasiTermMatchTests(TestCase): 25 | 26 | def test_simple(self): 27 | self.assertEqual(quasiterm("@foo()").match("hello"), 28 | {"foo": term('hello')}) 29 | self.assertEqual(quasiterm("@foo").match("hello"), 30 | {"foo": term('"hello"')}) 31 | self.assertEqual(quasiterm("@foo").match(term("hello")), 32 | {"foo": term('hello')}) 33 | self.assertRaises(TypeError, quasiterm("hello@foo").match, "hello") 34 | self.assertEqual(quasiterm(".String.@foo").match(term('"hello"')), 35 | {"foo": term('"hello"')}) 36 | self.assertEqual(quasiterm(".String.@foo").match("hello"), 37 | {"foo": term('"hello"')}) 38 | self.assertEqual(quasiterm("hello@foo").match(term("hello(3, 4)")), 39 | {"foo": term("hello(3, 4)")}) 40 | self.assertEqual(quasiterm("hello@bar()").match(term("hello")), 41 | {"bar": term("hello")}) 42 | self.assertEqual(quasiterm("hello@foo()").match("hello"), 43 | {"foo": term("hello")}) 44 | self.assertEqual(quasiterm("Foo(@x, Bar(1, @y))").match( 45 | term("Foo(a, Bar(1, 2))")), 46 | {"x": term("a"), "y": term("2")}) 47 | self.assertRaises(TypeError, quasiterm("Foo(@x, Bar(3, @y))").match, 48 | term("Foo(a, Bar(1, 2))")) 49 | self.assertRaises(TypeError, quasiterm("hello@foo()").match, 50 | term("hello(3, 4)")) 51 | self.assertRaises(TypeError, quasiterm("hello@foo").match, 52 | "hello") 53 | -------------------------------------------------------------------------------- /terml/test/test_terml.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from ometa.runtime import ParseError 3 | from terml.nodes import Tag, Term, coerceToTerm, TermMaker, termMaker 4 | from terml.parser import TermLParser, character, parseTerm 5 | 6 | 7 | class TestCase(unittest.TestCase): 8 | def assertRaises(self, ex, f, *args, **kwargs): 9 | try: 10 | f(*args, **kwargs) 11 | except ex as e: 12 | return e 13 | else: 14 | assert False, "%r didn't raise %r" % (f, ex) 15 | 16 | 17 | class TermMakerTests(TestCase): 18 | def test_make(self): 19 | m = TermMaker() 20 | t1 = m.Foo(1, 'a', m.Baz()) 21 | self.assertEqual(t1, parseTerm('Foo(1, "a", Baz)')) 22 | 23 | 24 | class ParserTest(TestCase): 25 | """ 26 | Test TermL parser rules. 27 | """ 28 | 29 | 30 | def getParser(self, rule): 31 | def parse(src): 32 | p = TermLParser(src) 33 | result, error = p.apply(rule) 34 | return result 35 | return parse 36 | 37 | 38 | def test_literal(self): 39 | """ 40 | Literals are parsed to literal terms. 41 | """ 42 | parse = self.getParser("literal") 43 | self.assertEqual(parse('"foo bar"'), 44 | Term(Tag('.String.'), "foo bar", None, None)) 45 | self.assertEqual(parse("'x'"), 46 | Term(Tag('.char.'), 'x', None, None)) 47 | self.assertEqual(parse("0xDECAFC0FFEEBAD"), 48 | Term(Tag('.int.'), 0xDECAFC0FFEEBAD, None, None)) 49 | self.assertEqual(parse("0755"), 50 | Term(Tag('.int.'), 0o755, None, None)) 51 | self.assertEqual(parse("3.14159E17"), 52 | Term(Tag('.float64.'), 3.14159E17, None, None)) 53 | self.assertEqual(parse("1e9"), 54 | Term(Tag('.float64.'), 1e9, None, None)) 55 | self.assertEqual(parse("0"), Term(Tag(".int."), 0, None, None)) 56 | self.assertEqual(parse("7"), Term(Tag(".int."), 7, None, None)) 57 | self.assertEqual(parse("-1"), Term(Tag(".int."), -1, None, None)) 58 | self.assertEqual(parse("-3.14"), 59 | Term(Tag('.float64.'), -3.14, None, None)) 60 | self.assertEqual(parse("3_000"), 61 | Term(Tag('.int.'), 3000, None, None)) 62 | self.assertEqual(parse("0.91"), 63 | Term(Tag('.float64.'), 0.91, None, None)) 64 | self.assertEqual(parse("3e-2"), 65 | Term(Tag('.float64.'), 3e-2, None, None)) 66 | self.assertEqual(parse("'\\n'"), 67 | Term(Tag('.char.'), character("\n"), None, None)) 68 | self.assertEqual(parse('"foo\\nbar"'), 69 | Term(Tag('.String.'), "foo\nbar", None, None)) 70 | self.assertEqual(parse("'\\u0061'"), 71 | Term(Tag('.char.'), character("a"), None, None)) 72 | self.assertEqual(parse('"z\141p"'), 73 | Term(Tag('.String.'), "zap", None, None)) 74 | self.assertEqual(parse('"x\41"'), 75 | Term(Tag('.String.'), "x!", None, None)) 76 | self.assertEqual(parse('"foo\\\nbar"'), 77 | Term(Tag('.String.'), "foobar", None, None)) 78 | 79 | 80 | def test_simpleTag(self): 81 | """ 82 | Tags are parsed properly. 83 | """ 84 | 85 | parse = self.getParser("tag") 86 | self.assertEqual(parse("foo"), Tag("foo")) 87 | self.assertEqual(parse('::"foo"'), Tag('::"foo"')) 88 | self.assertEqual(parse("::foo"), Tag('::foo')) 89 | self.assertEqual(parse("foo::baz"), Tag('foo::baz')) 90 | self.assertEqual(parse('foo::"baz"'), Tag('foo::"baz"')) 91 | self.assertEqual(parse("biz::baz::foo"), Tag('biz::baz::foo')) 92 | self.assertEqual(parse("foo_yay"), Tag('foo_yay')) 93 | self.assertEqual(parse("foo$baz32"), Tag('foo$baz32')) 94 | self.assertEqual(parse("foo-baz.19"), Tag('foo-baz.19')) 95 | 96 | 97 | def test_simpleTerm(self): 98 | """ 99 | Kernel syntax for terms is parsed properly. 100 | """ 101 | 102 | parse = self.getParser("baseTerm") 103 | self.assertEqual(parse("x"), Term(Tag("x"), None, None, None)) 104 | self.assertEqual(parse("x()"), Term(Tag("x"), None, [], None)) 105 | self.assertEqual(parse("x(1)"), Term(Tag("x"), None, 106 | (Term(Tag(".int."), 1, None, None),), 107 | None)) 108 | self.assertEqual(parse("x(1, 2)"), Term(Tag("x"), None, 109 | (Term(Tag(".int."), 1, 110 | None, None), 111 | Term(Tag(".int."), 2, 112 | None, None)), 113 | None)) 114 | self.assertEqual(parse("1"), Term(Tag(".int."), 1, None, None)) 115 | self.assertEqual(parse('"1"'), Term(Tag(".String."), "1", None, None)) 116 | self.assertRaises(ValueError, parse, "'x'(x)") 117 | self.assertRaises(ValueError, parse, '3.14(1)') 118 | self.assertRaises(ValueError, parse, '"foo"(x)') 119 | self.assertRaises(ValueError, parse, "1(2)") 120 | 121 | 122 | def test_fullTerm(self): 123 | """ 124 | Shortcut syntax for terms is handled. 125 | """ 126 | 127 | self.assertEqual(parseTerm("[x, y, 1]"), parseTerm(".tuple.(x, y, 1)")) 128 | self.assertEqual(parseTerm("{x, y, 1}"), parseTerm(".bag.(x, y, 1)")) 129 | self.assertEqual(parseTerm("f {x, y, 1}"), parseTerm("f(.bag.(x, y, 1))")) 130 | self.assertEqual(parseTerm("a: b"), parseTerm(".attr.(a, b)")) 131 | self.assertEqual(parseTerm('"a": b'), parseTerm('.attr.("a", b)')) 132 | self.assertEqual(parseTerm('a: [b]'), parseTerm('.attr.(a, .tuple.(b))')) 133 | 134 | 135 | def test_multiline(self): 136 | """ 137 | Terms spread across multiple lines are parsed correctly. 138 | """ 139 | single = parseTerm('foo(baz({x: "y", boz: 42}))') 140 | multi = parseTerm( 141 | """foo( 142 | baz({ 143 | x: "y", 144 | boz: 42} 145 | ))""") 146 | self.assertEqual(multi, single) 147 | 148 | 149 | def test_leftovers(self): 150 | e = self.assertRaises(ParseError, parseTerm, "foo(x) and stuff") 151 | self.assertEqual(e.position, 7) 152 | 153 | 154 | def test_unparse(self): 155 | 156 | def assertRoundtrip(txt): 157 | self.assertEqual('term(%r)' % (txt,), repr(parseTerm(txt))) 158 | cases = ["1", "3.25", "f", "f(1)", "f(1, 2)", "f(a, b)", 159 | "{a, b}", "[a, b]", "f{1, 2}", '''{"name": "Robert", attrs: {'c': 3}}'''] 160 | for case in cases: 161 | assertRoundtrip(case) 162 | 163 | 164 | def test_coerce(self): 165 | self.assertEqual( 166 | coerceToTerm({3: 4, "a": character('x'), (2, 3): [4, 5]}), 167 | parseTerm('{"a": \'x\', 3: 4, [2, 3]: [4, 5]}')) 168 | 169 | 170 | def test_hash(self): 171 | t = TermMaker() 172 | a = t.Arbitrary('foo') 173 | b = t.Arbitrary('foo') 174 | self.assertEqual(hash(a), hash(b)) 175 | -------------------------------------------------------------------------------- /test_parsley.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import parsley 4 | 5 | 6 | def wrapperFactory(addition): 7 | def wrapper(wrapped): 8 | return addition, wrapped 9 | return wrapper 10 | 11 | def nullFactory(*args): 12 | return args 13 | 14 | 15 | class StackTestCase(unittest.TestCase): 16 | def test_onlyBase(self): 17 | "stack can be called with no wrappers." 18 | fac = parsley.stack(nullFactory) 19 | self.assertEqual(fac('a'), ('a',)) 20 | 21 | def test_oneWrapper(self): 22 | "stack can be called with one wrapper." 23 | fac = parsley.stack(wrapperFactory(0), nullFactory) 24 | self.assertEqual(fac('a'), (0, ('a',))) 25 | 26 | def test_tenWrappers(self): 27 | "stack can be called with ten wrappers." 28 | args = [] 29 | result = 'a', 30 | for x in range(10): 31 | args.append(wrapperFactory(x)) 32 | result = 9 - x, result 33 | args.append(nullFactory) 34 | fac = parsley.stack(*args) 35 | self.assertEqual(fac('a'), result) 36 | 37 | def test_failsWithNoBaseSender(self): 38 | "stack does require at least the base factory." 39 | self.assertRaises(TypeError, parsley.stack) 40 | 41 | def test_senderFactoriesTakeOneArgument(self): 42 | "The callable returned by stack takes exactly one argument." 43 | fac = parsley.stack(nullFactory) 44 | self.assertRaises(TypeError, fac) 45 | self.assertRaises(TypeError, fac, 'a', 'b') 46 | --------------------------------------------------------------------------------