├── technotes ├── end-html.txt ├── new-expression-parser.txt ├── template-creator.pl ├── tracking.txt ├── start-html.txt ├── scratchpad.txt ├── sidebar.html ├── pydoc-analytics.pl ├── graphing.txt ├── formatter.txt ├── railroad.txt └── web-template.html ├── .gitignore ├── MANIFEST.in ├── upload-docs ├── README.txt ├── docs ├── .gitignore ├── parcon-download.md ├── index.html ├── parcon-examples.md ├── parcon-tutorial.md └── parcon-donate.md ├── parcon ├── options.py ├── socket.py ├── pargon │ ├── nbo.py │ └── __init__.py ├── tests.py ├── binary.py ├── railroad │ ├── regex.py │ ├── __init__.py │ └── raildraw.py ├── testframework.py ├── bnf.py ├── graph.py ├── ordered_dict.py ├── static.py └── pargen │ └── __init__.py ├── generate └── setup.py /technotes/end-html.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist 2 | /MANIFEST 3 | *.py[co] 4 | .project 5 | .pydevproject 6 | 7 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.txt setup.py 2 | recursive-include parcon *.py 3 | recursive-include docs * -------------------------------------------------------------------------------- /upload-docs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | scp docs/* jcp@opengroove.org:/var/www/www.opengroove.org/parcon/ 4 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | Parcon is a Python parser combinator library written by Alexander Boyd. It's 2 | designed to be easy to learn, easy to use, and to provide informative error 3 | messages. 4 | 5 | Parcon also allows parsing using BNF grammars. 6 | 7 | -------------------------------------------------------------------------------- /technotes/new-expression-parser.txt: -------------------------------------------------------------------------------- 1 | from parcon import rational, Forward, InfixExpr 2 | import operator 3 | expr = Forward() 4 | number = rational[float] 5 | term = number | "(" + expr + ")" 6 | term = InfixExpr(term, [("*", operator.mul), ("/", operator.truediv)]) 7 | term = InfixExpr(term, [("+", operator.add), ("-", operator.sub)]) 8 | expr << term(name="expr") 9 | expr.draw_productions_to_png({}, "/home/boydam/test.png") 10 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /parcon-examples.html 2 | /parcon-tutorial.html 3 | /parcon.pargen.html 4 | /parcon.html 5 | /parcon.graph.html 6 | /parcon.options.html 7 | /parcon.ordered_dict.html 8 | /parcon.pargon.html 9 | /parcon.pargon.nbo.html 10 | /parcon.railroad.html 11 | /parcon.railroad.raildraw.html 12 | /parcon.railroad.regex.html 13 | /parcon.static.html 14 | /parcon.testframework.html 15 | /parcon.tests.html 16 | /parcon-donate.html 17 | /parcon-download.html 18 | -------------------------------------------------------------------------------- /parcon/options.py: -------------------------------------------------------------------------------- 1 | 2 | class Options(object): 3 | def __init__(self, m, d={}, **defaults): 4 | self.values = {} 5 | self.values.update(defaults) 6 | self.values.update(d) 7 | self.values.update(m) 8 | 9 | def __getattr__(self, name): 10 | return self.values[name] 11 | 12 | __getitem__ = __getattr__ 13 | 14 | def __iter__(self): 15 | for k, v in self.values: 16 | yield k, v -------------------------------------------------------------------------------- /docs/parcon-download.md: -------------------------------------------------------------------------------- 1 | ### Download 2 | 3 | Parcon is available on the Python Package Index. The easiest way to install it is to open up a command prompt or a terminal and run this: 4 | 5 | pip install parcon 6 | 7 | The PyPI page on Parcon is here. There's a link on that page to download the latest version of Parcon, if you want to download it by hand. 8 | 9 | Both of those methods will get you Pargen and Static, two libraries provided as submodules of Parcon, as well. 10 | -------------------------------------------------------------------------------- /technotes/template-creator.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # This script, like pydoc-analytics.pl, provided by Alyx Wolcott, with some 4 | # modifications by Alexander Boyd to template the sidebar as well as the main 5 | # page content. 6 | 7 | use strict; 8 | use warnings; 9 | use File::Slurp; 10 | 11 | my $outer = read_file($ARGV[0]); 12 | my $inner = read_file($ARGV[1]); 13 | my $sidebar = read_file($ARGV[2]); 14 | 15 | chomp $inner; 16 | 17 | $outer =~ s!INSERT_CONTENT_HERE!$inner!; 18 | $outer =~ s!INSERT_SIDEBAR_HERE!$sidebar!; 19 | 20 | open(my $new, '>', $ARGV[3]); 21 | 22 | print {$new} $outer; 23 | 24 | -------------------------------------------------------------------------------- /technotes/tracking.txt: -------------------------------------------------------------------------------- 1 | 20 | -------------------------------------------------------------------------------- /parcon/socket.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This module provides classes that make it easier to parse data from network 4 | sockets or files. 5 | """ 6 | 7 | class LazyString(object): 8 | def __init__(self, more_function): 9 | self.more = more_function 10 | self.buffer = "" 11 | 12 | def __len__(self): 13 | return 2**30 14 | 15 | def __getitem__(self, item): 16 | if isinstance(item, slice): 17 | limit = item.stop 18 | else: 19 | limit = item + 1 20 | self.get_more(limit) 21 | return self.buffer[item] 22 | 23 | def get_more(self, limit): 24 | while len(self.buffer) < limit: 25 | self.buffer += self.more(limit - len(self.buffer)) 26 | -------------------------------------------------------------------------------- /technotes/start-html.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | -------------------------------------------------------------------------------- /technotes/scratchpad.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | A JSON parser: 4 | 5 | from parcon import * 6 | import operator 7 | cat_dicts = lambda x, y: dict(x.items() + y.items()) 8 | json = Forward() 9 | number = (+Digit() + -(SignificantLiteral(".") + +Digit()))[flatten]["".join][float] 10 | boolean = Literal("true")[lambda x: True] | Literal("false")[lambda x: False] 11 | string = ('"' + Exact(ZeroOrMore(AnyChar() - CharIn('\\"'))) + '"')["".join] 12 | null = Literal("null")[lambda x: None] 13 | pair = (string + ":" + json[lambda x: (x,)])[lambda x: {x[0]: x[1]}] 14 | json_object = ("{" + Optional(InfixExpr(pair, [(",", cat_dicts)]), {}) + "}") 15 | json_list = ("[" + Optional(InfixExpr(json[lambda x: [x]], [(",", operator.add)]), []) + "]") 16 | json << (json_object | json_list | string | boolean | null | number) 17 | -------------------------------------------------------------------------------- /generate: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd docs 4 | 5 | 6 | for DOC in parcon-tutorial parcon-examples parcon-donate parcon-download 7 | do 8 | pandoc -o ${DOC}-temp.html ${DOC}.md 9 | ../technotes/template-creator.pl ../technotes/web-template.html ${DOC}-temp.html ../technotes/sidebar.html ${DOC}.html 10 | rm ${DOC}-temp.html 11 | done 12 | 13 | 14 | for PACKAGE in parcon parcon.pargen parcon.static parcon.pargon parcon.pargon.nbo \ 15 | parcon.graph parcon.testframework parcon.tests parcon.railroad parcon.railroad.raildraw \ 16 | parcon.railroad.regex parcon.options parcon.ordered_dict 17 | do 18 | PYTHONPATH=.. pydoc -w $PACKAGE 19 | ../technotes/pydoc-analytics.pl ${PACKAGE}.html ${PACKAGE}-new.html 20 | rm ${PACKAGE}.html 21 | mv ${PACKAGE}-new.html ${PACKAGE}.html 22 | done 23 | 24 | 25 | cd .. 26 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | 23 | parcon-tutorial.html 24 | -------------------------------------------------------------------------------- /parcon/pargon/nbo.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module will have parsers for parsing binary protocols in it. Parcon can 3 | already parse binary data; this module will provide some Parcon parsers 4 | specifically oriented toward that purpose. Its most important feature will be 5 | a set of parsers to parse integers and other data in network byte order, hence 6 | this module's name (NBO = Network Byte Order). It will have parsers for parsing 7 | other binary things too, though, such as length-specified strings or packets in 8 | protocols that have a defined length. 9 | 10 | This module will also contain formatters for all of the binary types that it 11 | can parse. In fact, I'm considering having all of the parsers be subclasses of 12 | parcon.pargon.ParserFormatter. The only difficulty is that it would make 13 | assembling formatters difficult since ParserFormatters act like parsers when 14 | used as arguments to operators. So this needs some thought. 15 | 16 | (if you can't tell, the nbo module is still in planning :-) ) 17 | """ -------------------------------------------------------------------------------- /technotes/sidebar.html: -------------------------------------------------------------------------------- 1 | 25 | Send an email to alex@opengroove.org if 26 | you have any questions or problems while using Parcon. 27 | -------------------------------------------------------------------------------- /technotes/pydoc-analytics.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # Thanks to Alyx Wolcott for this script 4 | 5 | use strict; 6 | use warnings; 7 | use File::Slurp; 8 | 9 | my $original = < 11 | EOSEARCH 12 | 13 | my $change = < 15 | 16 | var _gaq = _gaq || []; 17 | // START JCP ADD 18 | if(window.location.href.indexOf("http://www.opengroove.org/") == 0) { 19 | // END JCP ADD 20 | _gaq.push(['_setAccount', 'UA-6079740-6']); 21 | _gaq.push(['_trackPageview']); 22 | 23 | (function() { 24 | var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; 25 | ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; 26 | var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); 27 | })(); 28 | // START JCP ADD 29 | } 30 | // END JCP ADD 31 | 32 | 33 | 34 | EOCHANGE 35 | 36 | chomp($original); 37 | chomp($change); 38 | 39 | my $file = read_file($ARGV[0]); 40 | 41 | $file =~ s/$original/$change/; 42 | 43 | open(my $out, '>', $ARGV[1]); 44 | 45 | print {$out} $file; 46 | 47 | -------------------------------------------------------------------------------- /docs/parcon-examples.md: -------------------------------------------------------------------------------- 1 | This file contains a bunch of examples of things you can do with Parcon. More 2 | examples are present in the [Parcon module documentation](parcon.html). 3 | 4 | ### Expression evaluator 5 | 6 | from parcon import rational, Forward, InfixExpr 7 | from decimal import Decimal 8 | import operator 9 | expr = Forward() 10 | number = rational[Decimal] 11 | term = number | "(" + expr + ")" 12 | term = InfixExpr(term, [("*", operator.mul), ("/", operator.truediv)]) 13 | term = InfixExpr(term, [("+", operator.add), ("-", operator.sub)]) 14 | expr << term 15 | 16 | This implements a simple expression evaluator, and shows how Parcon allows the 17 | evaluation logic to be specified as part of the parser. It uses Python's 18 | decimal module for arbitrary-precision arithmetic. This expression parser can 19 | then be used thus: 20 | 21 | print expr.parse_string("1+2") # prints 3 22 | print expr.parse_string("1+2+3") # prints 6 23 | print expr.parse_string("1+2+3+4") # prints 10 24 | print expr.parse_string("3*4") # prints 12 25 | print expr.parse_string("5+3*4") # prints 17 26 | print expr.parse_string("(5+3)*4") # prints 32 27 | print expr.parse_string("10/4") # prints 2.5 28 | -------------------------------------------------------------------------------- /technotes/graphing.txt: -------------------------------------------------------------------------------- 1 | Graphs... 2 | 3 | I'm thinking I'll create my own graph class. Really simple like. Parsers and formatters, and perhaps static types in the future, can all generate instances of this class. 4 | 5 | Then there'll be a module, maybe parcon.graph, that contains functions for graphing things using various graphing systems. The two I'm thinking of having for starters are graphviz and networkx+matplotlib. 6 | 7 | Ok so... 8 | 9 | When we ask something to graph itself... 10 | 11 | well... 12 | 13 | We should have a method for parsers, do_graph. This method takes an AGraph. It should add itself to the graph under the name id(self). It should also add any other auxiliary nodes that it needs. It should then add edges going from itself to things that it depends on. It then returns a list of all of the things (things that should also provide do_graph) that it depends on. 14 | 15 | Actually, we should just have all parsers that support graphing subclass from Graphable, which has that function as one that has to be overridden. It then also has graph, which does the actual graphing stuff; specifically, it creates an AGraph and a set of object ids that we've already visited, then starts with self and calls do_graph, then, for each returned object that hasn't already been visited yet (as per its id being in the list of visited objects), we add it to the graph. 16 | 17 | Then we return the graph. 18 | 19 | This Graphable class would be in parcon.graph. This would be independent of any of the modules, most likely including parcon.static in case I decide to give static the ability to graph itself. parcon would then import parcon.graph and have all of its parsers, or at least most of them, subclass from Graphable. 20 | 21 | -------------------------------------------------------------------------------- /parcon/pargon/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A set of functions and classes that make it easier to use Parcon and Pargen 3 | together. Pargon is a portmanteau of Parcon and Pargen. 4 | """ 5 | 6 | import parcon 7 | from parcon import pargen 8 | from parcon import static 9 | 10 | parser_type = static.compile(parcon.Parser) 11 | formatter_type = static.compile(pargen.Formatter) 12 | 13 | 14 | class ParserFormatter(parcon.Parser, pargen.Formatter): 15 | """ 16 | A class that allows creating objects which are both parsers and formatters. 17 | Such a parser/formatter is created by passing in a Parser and a Formatter. 18 | The resulting instance of ParserFormatter, when used as a parser, acts like 19 | the parser passed to it, and when used as a formatter, acts like the 20 | formatter passed to it. 21 | 22 | This class should only be used when you want to create a parser/formatter 23 | that acts like a predetermined parser and a predetermined formatter. If you 24 | want to implement a custom parser/formatter yourself, you can just subclass 25 | both Parser and Formatter in your new class. 26 | 27 | When a ParserFormatter is used as an argument to an operator such as + or 28 | |, it behaves as a parser. If you need it to behave as a formatter, you'll 29 | probably want to wrap it in a pargen.Forward instance. 30 | """ 31 | def __init__(self, parser, formatter): 32 | parser_type.check_matches(parser) 33 | formatter_type.check_matches(formatter) 34 | self.parser = parser 35 | self.formatter = formatter 36 | 37 | def parse(self, text, position, end, space): 38 | return self.parser.parse(text, position, end, space) 39 | 40 | def format(self, input): 41 | return self.formatter.format(input) 42 | 43 | 44 | class Literal(ParserFormatter): 45 | """ 46 | A parser/formatter that behaves like Parcon and Pargen's Literal classes. 47 | """ 48 | def __init__(self, text): 49 | ParserFormatter.__init__(self, parcon.Literal(text), pargen.Literal(text)) 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /parcon/tests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from parcon.testframework import * 4 | import parcon 5 | from parcon import pargen 6 | from parcon import static 7 | 8 | tests = [] 9 | classes_tested = set() 10 | 11 | test = TestSuite() 12 | 13 | 14 | @test(parcon.Return) 15 | def case(): 16 | x = parcon.Return("test") 17 | assert x.parse_string("") == "test" 18 | x = parcon.Return({1:2, "3":"4"}) 19 | assert x.parse_string("") == {1:2, "3":"4"} 20 | x = parcon.Return(None) 21 | assert x.parse_string("") is None 22 | check_raises(Exception, x.parse_string, "test") 23 | 24 | 25 | @test(parcon.Literal) 26 | def case(): #@DuplicatedSignature 27 | x = parcon.Literal("hello") 28 | assert x.parse_string("hello") is None 29 | check_raises(Exception, x.parse_string, "bogus") 30 | 31 | 32 | @test(parcon.SignificantLiteral) 33 | def case(): #@DuplicatedSignature 34 | x = parcon.SignificantLiteral("hello") 35 | assert x.parse_string("hello") == "hello" 36 | check_raises(Exception, x.parse_string, "bogus") 37 | 38 | 39 | @test(parcon.Translate) 40 | def case(): #@DuplicatedSignature 41 | x = parcon.Translate(parcon.SignificantLiteral("5"), int) 42 | assert x.parse_string("5") == 5 43 | x = parcon.SignificantLiteral("5")[int] 44 | assert x.parse_string("5") == 5 45 | 46 | 47 | def run_tests(): 48 | targets = set() 49 | targets |= set(subclasses_in_module(parcon.Parser, ("parcon",))) 50 | targets |= set(subclasses_in_module(pargen.Formatter, ("parcon.pargen",))) 51 | targets |= set(subclasses_in_module(static.StaticType, ("parcon.static",))) 52 | test.warn_missing_targets(targets) 53 | passed, failed = test.run_tests() 54 | print("-" * 75) 55 | print("%s tests passed" % passed) 56 | print("%s tests failed" % failed) 57 | print("-" * 75) 58 | print() 59 | if failed == 0: 60 | print("TESTING SUCCESSFUL") 61 | else: 62 | print("TESTING FAILED") 63 | 64 | 65 | if __name__ == "__main__": 66 | run_tests() 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /docs/parcon-tutorial.md: -------------------------------------------------------------------------------- 1 | ### Introduction 2 | 3 | **Parcon** is a parser library written by Alexander Boyd. It's designed to be fast, easy to use, easy to learn, and to provide informative error messages. 4 | 5 | **Pargen**, which is provided as a submodule of Parcon, is a formatter 6 | library. It's much the opposite of Parcon: while Parcon is used to parse text 7 | into various objects, Pargen is used to format objects into text. As an 8 | example, if you wanted to reimplement Python's json module, you would use 9 | Parcon to implement json.loads and Pargen to implement json.dumps. 10 | 11 | **Static**, which is also provided as a submodule of Parcon, is a static typing 12 | library and Python object pattern library. It lets you build patterns that 13 | match Python objects based on their type, their attributes, certain properties 14 | such as whether or not the object is a sequence, the types that make up the 15 | object's values if the object is a sequence, and so on. 16 | 17 | ### Donations 18 | 19 | Parcon is developed by Alex, a student who develops Parcon on time that could otherwise be spent working at his job to earn money to pay for college. Donations are therefore greatly appreciated. 20 | 21 | 22 | 23 | 26 | 27 |
24 | 25 |
28 | 29 | ### Download 30 | 31 | See the downloads page for information on how to download/install Parcon, Pargen, and Static. 32 | 33 | ### Documentation 34 | 35 | The main source of doumentation for Parcon is its module documentation. The same is true for Pargen and Static, although they're not quite as well documented as Parcon is. 36 | 37 | ### Blog 38 | 39 | The Parcon Blog is Parcon's official blog. 40 | 41 | ### Examples 42 | 43 | Parcon examples are provided in Parcon's module documentation and on The Parcon Blog. 44 | 45 | ### Source 46 | 47 | Parcon is open-source, released under the terms of the GNU Lesser General Public License. Source code is available on GitHub here. 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /parcon/binary.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | A module that provides parsers to allow Parcon to parse binary data. Some of 4 | the main parsers it provides are: 5 | 6 | integer: parses four bytes in big-endian order and returns an int. 7 | short: parses two bytes in big-endian order and returns an int. 8 | byte: parses one byte and returns an int. 9 | u_integer, u_short, u_byte: same as integer, short, and byte, respectively, 10 | except that they treat the value as unsigned. 11 | 12 | This module also provides a class, PyStruct, which is created with a format 13 | specification (in the same format as that passed to Python's 14 | struct.pack/unpack). It parses any data that matches the specification, and 15 | returns the resulting values. 16 | """ 17 | 18 | import parcon 19 | import struct 20 | 21 | class PyStruct(parcon.Parser): 22 | """ 23 | A parser that takes a particular format specification as expected by 24 | Python's struct module's operations. It parses any input that 25 | struct.unpack can successfully understand and returns the result as a list 26 | if more than one item was returned; otherwise it returns the single result 27 | that unpack produced. 28 | 29 | Note that the format specifier cannot contain "p" (a.k.a. a pascal string 30 | format) at present. I'll add support for this later. 31 | """ 32 | def __init__(self, format): 33 | self.format = format 34 | self.length = struct.calcsize(format) 35 | 36 | def parse(self, text, position, end, space): 37 | position = space.consume(text, position, end) 38 | if position + self.length > end: 39 | return parcon.failure([(position, parcon.ECustomExpectation("struct.unpack format " + repr(self.format)))]) 40 | result = struct.unpack(self.format, text[position:position+self.length]) 41 | if len(result) == 1: 42 | result = result[0] 43 | else: 44 | result = list(result) 45 | return parcon.match(position + self.length, result, (position + self.length, parcon.EUnsatisfiable())) 46 | 47 | def __repr__(self): 48 | return "PyStruct(%s)" % repr(self.format) 49 | 50 | 51 | integer = PyStruct(">i")(expected="four bytes (signed integer)") 52 | u_integer = PyStruct(">I")(expected="four bytes (unsigned integer)") 53 | short = PyStruct(">h")(expected="two bytes (signed short)") 54 | u_short = PyStruct(">H")(expected="two bytes (unsigned short)") 55 | byte = PyStruct(">b")(expected="one byte (signed byte)") 56 | u_byte = PyStruct(">B")(expected="one byte (unsigned byte)") 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /technotes/formatter.txt: -------------------------------------------------------------------------------- 1 | So, thinking about doing parcon.formatter, and how that would work... 2 | 3 | pargen... not a bad name for it 4 | 5 | I might do that. 6 | 7 | In fact, I just did. It's now parcon.pargen. 8 | 9 | So... 10 | 11 | With parcon, a parser is handed an input string, the position at which to begin parsing, and optionally a space parser. The result is either failure or success with a result value and position at which parsing finished. (I'm deliberately omitting discussion of result expectations since such a concept would be too difficult to reproduce in pargen; it will, however, have some ability to generate an error message as to why a particular formatter failed.) 12 | 13 | With pargen, a formatter is handed an input object. It either succeeds or fails, similar to a parcon parser. If it fails, it provides an error message as to why it failed (it doesn't provide a corresponding position, since such a notion makes little sense in pargen). If it succeeds, it provides a string (the result), and the data that it did not consume. 14 | 15 | That sounds sensible. 16 | 17 | So, we have something like: 18 | 19 | class Formatter(object): 20 | def format(input): 21 | pass 22 | def result(text, remainder): 23 | pass 24 | def fail(message): 25 | pass 26 | 27 | Now, we have some formatters: 28 | 29 | ForEach(formatter): Expects a sequence as input. Iterates over all of its items, applying formatter to each one, then results in the concatenation of the outputs of the formatter. 30 | 31 | Delimited(formatter, delimiter): Same as ForEach, but delimiter (a string) will be put in between each result. 32 | 33 | Head(formatter): Expects a sequence as input. Gets the first item from the sequence, passes it through formatter, and returns the result, along with a list containing all but the first item in the input sequence as the remainder. 34 | 35 | Tail(formatter): Same as Head, but operates on the last item in the sequence instead of the first. 36 | 37 | Instance(some_type): Accepts a static type (as specified by the parcon.static module; ordinary Python class objects can thus be passed in). Succeeds, not consuming any input and producing an empty string as output, if the input matches the specified static type. Fails otherwise. 38 | 39 | And(formatter1, formatter2): Succeeds only if its first and second formatter both succeed, both operating on the original input given to And. Returns whatever its second formatter returns. 40 | 41 | Then(formatter1, formatter2): Applies its first formatter. If it fails, Then fails. Otherwise, applies its second formatter with the remainder of its first formatter. If it fails, Then fails. Otherwise, Then returns the concatenation of the results of its first and second formatters, along with the remainder of its second formatter. 42 | 43 | String(): Always succeeds. Returns str(value) for the value that's passed to it, with None as the remainder. 44 | 45 | Repr(): Same as String(), but return repr(value) instead of str(value). 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/parcon-donate.md: -------------------------------------------------------------------------------- 1 | ### Donations 2 | 3 | Parcon is released free of charge for anyone to use. Its developer, Alex, is enrolled at the University of Utah, and he has to take time out of his day to develop parcon. Time that could otherwise be spent working at his job to earn money to pay for college. If you've found Parcon useful, a donation would be greatly appreciated. 4 | 5 | You can donate any of the following amounts, or an amount of your choosing. You do not need a PayPal account to donate. 6 | 7 | 10 | 11 | 12 | 13 | 21 | 29 | 37 | 45 | 53 | 54 | -------------------------------------------------------------------------------- /parcon/railroad/regex.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This module knows how to convert some regular expressions into railroad 4 | objects. 5 | """ 6 | 7 | from parcon import railroad as rr 8 | import parcon as p 9 | 10 | regex_parser = None 11 | 12 | def _convert_repetition(construct, flag): 13 | if flag is None: 14 | return construct 15 | if flag == "*": 16 | return rr.Or(rr.Loop(construct, rr.Nothing()), rr.Nothing()) 17 | if flag == "+": 18 | return rr.Loop(construct, rr.Nothing()) 19 | if flag == "?": 20 | return rr.Or(construct, rr.Nothing()) 21 | raise ValueError 22 | 23 | 24 | def _translate_backslash(char): 25 | if char == "\\n": 26 | return "\n" 27 | if char == "\\r": 28 | return "\r" 29 | if char == "\\t": 30 | return "\t" 31 | return char 32 | 33 | 34 | def init_parser(): 35 | global regex_parser 36 | global component 37 | global alt_component 38 | global char 39 | global char_class 40 | global char_class_range 41 | global char_class_char 42 | global alternative 43 | expr = p.Forward() 44 | char_class_char = (p.AnyChar() - p.CharIn("^]"))[lambda x: rr.Token(rr.TEXT, x)] 45 | char_class_range = ((p.AnyChar() - p.CharIn("^-]")) + "-" + (p.AnyChar() - "-]"))[ 46 | lambda x: rr.Or(*[rr.Token(rr.TEXT, chr(c)) for c in range(ord(x[0]), ord(x[1])+1)])] 47 | char_class = ("[" + +(char_class_range | char_class_char) + "]")[ 48 | lambda x: rr.Or(*x) if len(x) != 1 else x[0]] 49 | char = (p.AnyChar() - p.CharIn("[]().|\\")) 50 | backslash = ("\\" + p.AnyChar())[_translate_backslash] 51 | chars = (+(backslash | char))[p.concat][lambda x: rr.Token(rr.TEXT, x)] 52 | matching_group = "(" + expr + ")" 53 | non_matching_group = "(?:" + expr + ")" 54 | component = char_class | chars | non_matching_group | matching_group 55 | alt_component = (component + p.Optional(p.CharIn("*+?"), (None,)))[ 56 | lambda x: _convert_repetition(*x)] 57 | alt_component = alt_component[...][lambda x: x[0] if len(x) == 1 else rr.Then(*x)] 58 | alternative = p.InfixExpr(alt_component, [("|", rr.Or)]) 59 | expr << alternative 60 | regex_parser = expr 61 | 62 | 63 | def convert_regex(regex): 64 | """ 65 | Converts a regex, specified as a string, to an instance of one of the 66 | subclasses of parcon.railroad.Component. If the specified regex contains 67 | constructs that this module does not understand, None will be returned. 68 | """ 69 | if regex_parser is None: 70 | init_parser() 71 | try: 72 | return regex_parser.parse_string(regex, whitespace=p.Invalid()) 73 | except p.ParseException: 74 | return None 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /parcon/testframework.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from traceback import print_exc 3 | 4 | class TestException(Exception): 5 | pass 6 | 7 | 8 | def check_raises(*args, **kwargs): 9 | """ 10 | check_raises(exception_type, function, ...) 11 | 12 | Calls function, passing in the rest of the positional arguments and keyword 13 | arguments. If the function does not raise an exception, check_raises raises 14 | an exception indicating that the specified function failed to do so. If the 15 | function raises an exception not of the specified exception_type (which can 16 | be a tuple of multiple types if desired), another exception is raised 17 | indicating the problem. Otherwise, check_raises returns None. 18 | """ 19 | exception_type = args[0] 20 | function = args[1] 21 | try: 22 | function(*args[2:], **kwargs) 23 | raise TestException(str(function) + " failed to raise " + str(exception_type)) 24 | except Exception as e: 25 | if not isinstance(e, exception_type): 26 | print_exc() 27 | raise TestException(str(function) + " was supposed to raise an " + 28 | "exception of type " + str(exception_type) + 29 | " but raised " + str(type(e)) + " instead") 30 | 31 | 32 | def subclasses_in_module(c, modules=None, original=True): 33 | result = [] 34 | if original: 35 | if modules is None or c.__module__ in modules: 36 | result.append(c) 37 | subclasses = c.__subclasses__() 38 | for subclass in subclasses: 39 | result += subclasses_in_module(subclass, modules) 40 | return result 41 | 42 | 43 | class TestSuite(object): 44 | def __init__(self): 45 | self.tests = [] 46 | self.targets = set() 47 | 48 | def __call__(self, target): 49 | def decorator(function): 50 | self.tests.append(function) 51 | self.targets.add(target) 52 | function.testing_target = target 53 | return function 54 | return decorator 55 | 56 | def warn_missing_targets(self, targets): 57 | if len(targets - self.targets) > 0: 58 | print("WARNING: missing tests for " + str(list(targets - self.targets))) 59 | print("-" * 75) 60 | 61 | def run_tests(self): 62 | passed = 0 63 | failed = 0 64 | for test in self.tests: 65 | target = getattr(test, "testing_target", None) 66 | target_desc = str(target) if target is not None else "(no target)" 67 | if target is not None and getattr(target, "__module__", None) is not None: 68 | target_desc += " in module " + target.__module__ 69 | try: 70 | test() 71 | print("TEST PASSED: " + test.__name__ + " testing " + target_desc) 72 | passed += 1 73 | except: 74 | print("TEST FAILED: " + test.__name__ + " testing " + target_desc) 75 | print("Exception for the above failure:") 76 | print_exc() 77 | failed += 1 78 | return passed, failed 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /parcon/bnf.py: -------------------------------------------------------------------------------- 1 | # noinspection PyUnresolvedReferences 2 | from six.moves import reduce 3 | import parcon as p 4 | from collections import namedtuple 5 | 6 | ''' 7 | A BNF-to-Parcon converter. This module provides a method, convert, which 8 | converts a BNF grammar passed as a string into a dictionary of Parcon parsers, 9 | one for each nonterminal in the BNF grammar, whose value is a Parcon parser 10 | that will parse text conforming to the specified nonterminal. For example, a 11 | very simple numeric expression evaluator using only the bnf module (and not any 12 | other Parcon modules) could look like this: 13 | 14 | bnf = """ 15 | ::= | "+" 16 | ::= | "*" 17 | ::= | "(" ")" 18 | ::= | 19 | ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 20 | """ 21 | parsers = convert(bnf) 22 | parser = parsers["expression"] 23 | ...continue this example... 24 | 25 | Note that grammars must not be left-recursive; attempting to use a 26 | left-recursive grammar will cause an infinite loop when attempting to parse text 27 | that (ostensibly) conforms to the BNF grammar specified. If anyone wants to fix 28 | this, feel free; see github.com/javawizard/parcon. 29 | ''' 30 | 31 | Production = namedtuple("Production", ["name", "alternatives"]) 32 | Alternative = namedtuple("Alternative", ["values"]) 33 | Reference = namedtuple("Reference", ["name"]) 34 | String = namedtuple("String", ["value"]) 35 | 36 | whitespace = p.Regex(r"[ \t]+") 37 | 38 | equals = p.Literal("::=") 39 | ref = (p.Literal("<") + (+p.CharNotIn(">"))["".join](desc="Any char except >") + ">")(name="name") 40 | production_start = ref + equals 41 | string = p.Exact('"' + p.CharNotIn('"')[...](desc='Any char except "') + '"')["".join][String] 42 | component = (ref & p.Not(p.Present(ref + equals)))[Reference] | string 43 | alternative = (+component)[Alternative](name="alternative") 44 | production = (production_start + p.InfixExpr(alternative[lambda a: [a]], [("|", lambda a, b: a+b)]))[lambda x: Production(*x)](name="production") 45 | productions = (+production)(name="bnf") 46 | 47 | def bnf_to_parcon(productions): 48 | result = {} 49 | for name, alternatives in productions: 50 | result[name] = p.Forward() 51 | for name, alternatives in productions: 52 | alternative_parsers = [] 53 | for alternative in alternatives: 54 | component_parsers = [] 55 | for component in alternative.values: 56 | if isinstance(component, String): 57 | component_parsers.append(p.SignificantLiteral(component.value)) 58 | elif isinstance(component, Reference): 59 | component_parsers.append(result[component.name]) 60 | else: 61 | raise TypeError(type(component)) 62 | alternative_parsers.append(reduce(p.Then, component_parsers)) 63 | result[name] << p.First(alternative_parsers)[name](name=name) 64 | # Unwrap all of the forwards to make things somewhat more clear 65 | for name in result: 66 | result[name] = result[name].parser 67 | return result 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup( 6 | name="parcon", 7 | version="0.1.25", 8 | author="Alexander Boyd", 9 | author_email="alex@opengroove.org", 10 | url="http://www.opengroove.org/parcon/", 11 | description="A parser/formatter library that's easy to use and that provides informative error messages.", 12 | long_description= 13 | """ 14 | **Parcon** is a parser library. It can be used for parsing both normal 15 | text and binary data. It's designed to be easy to use and to provide informative 16 | error messages. 17 | 18 | **Pargen**, which is provided as a submodule of Parcon, is a formatter 19 | library. It's much the opposite of Parcon: while Parcon is used to parse text 20 | into various objects, Pargen is used to format objects into text. As an 21 | example, if you wanted to reimplement Python's json module, you would use 22 | Parcon to implement json.loads and Pargen to implement json.dumps. 23 | 24 | **Static**, which is also provided as a submodule of Parcon, is a static typing 25 | library and Python object pattern library. It lets you build patterns that 26 | match Python objects based on their type, their attributes, certain properties 27 | such as whether or not the object is a sequence, the types that make up the 28 | object's values if the object is a sequence, and so on. 29 | 30 | All of the classes, and most of the functions, in Parcon are comprehensively 31 | documented. The best place to look for help is in Parcon's module documentation. 32 | Pargen's classes and functions and Static's classes and functions are also 33 | documented, though not quite as well as Parcon's are at present. 34 | 35 | Parcon grammars are written as Python expressions, so there's no need to learn 36 | a separate language for writing grammars or anything like that. 37 | 38 | Here's an example of a simple expression evaluator written using Parcon:: 39 | 40 | from parcon import number, Forward, InfixExpr 41 | import operator 42 | expr = Forward() 43 | term = number[float] | "(" + expr + ")" 44 | term = InfixExpr(term, [("*", operator.mul), ("/", operator.truediv)]) 45 | term = InfixExpr(term, [("+", operator.add), ("-", operator.sub)]) 46 | expr << term(name="expr") 47 | 48 | This expression evaluator can be used thus:: 49 | 50 | print(expr.parse_string("1+2")) # prints 3 51 | print(expr.parse_string("1+2+3")) # prints 6 52 | print(expr.parse_string("1+2+3+4")) # prints 10 53 | print(expr.parse_string("3*4")) # prints 12 54 | print(expr.parse_string("5+3*4")) # prints 17 55 | print(expr.parse_string("(5+3)*4")) # prints 32 56 | print(expr.parse_string("10/4")) # prints 2.5 57 | 58 | A syntax diagram can then be generated from it like this:: 59 | 60 | expr.draw_productions_to_png({}, "syntax-expr.png") 61 | 62 | which would draw `this image `_ to syntax-expr.png. 63 | 64 | More examples can be found on `Parcon's blog `_, 65 | and more information in general can be found at `Parcon's website `_. 66 | But reports should be submitted to `the GitHub issue tracker `_. 67 | 68 | Parcon is currently much more comprehensively documented than Pargen and Static 69 | are. Improved documentation for Pargen and Static will come soon. 70 | """, 71 | classifiers=[ 72 | "Programming Language :: Python", 73 | "Programming Language :: Python :: 2.6", 74 | "Programming Language :: Python :: 2.7", 75 | "Programming Language :: Python :: 3", 76 | "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", 77 | "Operating System :: OS Independent", 78 | "Development Status :: 4 - Beta", 79 | "Intended Audience :: Developers", 80 | "Topic :: Software Development :: Libraries :: Python Modules", 81 | "Topic :: Text Processing" 82 | ], 83 | packages=["parcon", "parcon.pargen", "parcon.pargon", "parcon.railroad"], 84 | install_requires=["six"] 85 | ) 86 | -------------------------------------------------------------------------------- /parcon/graph.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | A module that provides graphing support to Parcon and its associated libraries. 4 | You most likely won't use this module directly; instead, you just call the 5 | graph method on a Parcon parser (or other sort of object) that extends 6 | Graphable as well (and all parsers included with Parcon do, or they will at 7 | some point in the future, as will Pargen formatters and Static types). 8 | """ 9 | 10 | import subprocess 11 | import json 12 | 13 | def escape_string(string): 14 | return json.dumps(str(string))[1:-1] 15 | 16 | class Graphable(object): 17 | """ 18 | A class that classes knowing how to graph themselves should subclass. The 19 | idea is that all parsers in parcon and, in the future, formatters from 20 | pargen and types from static, will extend this class. 21 | 22 | This class is intended to be used as a mixin; calling Graphable.__init__ is 23 | not necessary. The only requirement is that a subclass override do_graph. 24 | """ 25 | def graph(self): 26 | """ 27 | Graphs this Graphable object by calling its do_graph and the do_graph 28 | functions defined by all of the things that this Graphable depends on. 29 | The result will be a Graph object. 30 | 31 | Each node in the resulting Graph will be named after its respective 32 | object's identity, a.k.a. the value returned by the built-in id 33 | function. 34 | 35 | The quickest way to use this would be to do something like this: 36 | 37 | something.graph().draw("example.png") 38 | 39 | For the draw method to work, however, you must have the dot program 40 | (which is part of Graphviz) installed. 41 | """ 42 | graph = Graph() 43 | visited = set() 44 | new_list = [self] 45 | while new_list: 46 | old_list = new_list 47 | new_list = [] 48 | for graphable in old_list: 49 | if not isinstance(graphable, Graphable): 50 | raise Exception("A non-graphable object was found in the " 51 | "graph. The object was of type " + 52 | str(type(graphable)) + ", and it appears " 53 | " to be " + str(graphable) + ".") 54 | if id(graphable) in visited: 55 | continue 56 | visited.add(id(graphable)) 57 | new_list += graphable.do_graph(graph) 58 | return graph 59 | 60 | def do_graph(self, graph): 61 | """ 62 | Adds nodes (typically one, but more are allowed) representing this 63 | Graphable to the specified graph, which should be an instance of Graph, 64 | and adds all relevant edges linking to other nodes (even if they 65 | haven't actually been created in the graph yet). 66 | 67 | The return value should then be a list of all other Graphable instances 68 | to which this one linked and which thus need to have their do_graph 69 | methods called to add them into the graph. 70 | 71 | Each node's id should be the result of id(object), where object is the 72 | corresponding Graphable. Thus this graphable should add itself to the 73 | graph as a node named id(self), and it should link to any other 74 | Graphables by using id(some_graphable) as the edge target. 75 | 76 | Unless you're writing a subclass of Graphable, you probably won't 77 | actually need to use this method; instead, you'll most likely use the 78 | graph method. Subclasses must override this method; it will raise a 79 | NotImplementedError if they don't. 80 | """ 81 | raise NotImplementedError(str(type(self))) 82 | 83 | 84 | class Graph(object): 85 | """ 86 | A graph. Instances of this class represent a graph of nodes and edges, with 87 | nodes and edges both being able to have attributes. 88 | 89 | This class is, by default, set up to create directed graphs. You can 90 | create undirected ones by setting a graph's separator field to "--" and 91 | that same graph's graph_type field to "graph". 92 | 93 | I wrote my own class instead of using pygraphviz because the underlying 94 | library that pygraphviz uses doesn't preserve node ordering when writing 95 | output, which results in ordering="out" not working correctly; Parcon 96 | depends on ordering="out" to lay out parsers correctly, hence this class 97 | provided as a replacement. 98 | 99 | This class is also pure-Python, whereas pygraphviz is not. 100 | """ 101 | def __init__(self): 102 | self.nodes = {} # map of node names to maps of node attributes 103 | self.edges = [] # list of edges as (from_name, to_name, attribute_map) 104 | self.top_node = None 105 | self.separator = "->" 106 | self.graph_type = "digraph" 107 | 108 | def add_node(self, name, **attributes): 109 | """ 110 | Adds a node to this graph. Name is the name of the node. Attributes are 111 | the attributes that should be added, from the set of allowed Graphviz 112 | node attributes. 113 | """ 114 | node_map = self.nodes.get(name) 115 | if node_map is None: 116 | node_map = {} 117 | self.nodes[name] = node_map 118 | node_map.update(attributes) 119 | if self.top_node is None: 120 | self.top_node = name 121 | 122 | def add_edge(self, source, target, **attributes): 123 | """ 124 | Adds an edge to this graph. Unlike Pygraphviz, adding an edge does not 125 | create any nodes it depends on; however, an edge can be added before 126 | its corresponding nodes have been added, so long as they are then added 127 | before a function such as to_dot_file() is called. 128 | 129 | source is the name of the source node. target is the name of the target 130 | node. attributes are attributes for this edge. 131 | """ 132 | self.edges.append((source, target, attributes)) 133 | 134 | def __str__(self): 135 | return self.to_dot_file() 136 | 137 | def to_dot_file(self): 138 | """ 139 | Formats this graph into a .dot-style file, and returns the would-be 140 | file contents. 141 | """ 142 | format_attributes = lambda attributes: ", ".join(k + '="' + escape_string(v) + '"' for k, v in attributes.items()) 143 | result = [] 144 | result.append(self.graph_type + " g {") 145 | for node, attributes in self.nodes.items(): 146 | result.append(" " + str(node) + " [" + format_attributes(attributes) + "];") 147 | for source, target, attributes in self.edges: 148 | result.append(" " + str(source) + " " + self.separator + " " + str(target) + " [" + format_attributes(attributes) + "];") 149 | if self.top_node is not None: 150 | result.append(' {rank="min"; ' + str(self.top_node) + "}") 151 | result.append("}") 152 | result.append("") 153 | return "\n".join(result) 154 | 155 | def draw(self, file, format="png", program="dot"): 156 | """ 157 | Draws this graph into a file. 158 | 159 | file is the name of the file to write to (not a file object). format is 160 | the format to use; this defaults to "png". program is the program to 161 | use; this defaults to "dot", and as a result, the dot program must be 162 | installed to call this with its default arguments. 163 | """ 164 | p = subprocess.Popen([program, "-T", format, "-o", file], stdin=subprocess.PIPE) 165 | with p.stdin as stdin: 166 | stdin.write(self.to_dot_file()) 167 | p.wait() 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /parcon/railroad/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This module provides various classes for specifying what a particular syntax 4 | diagram (a.k.a. railroad diagram) should look like. 5 | 6 | The actual drawing of diagrams created with classes in this module is left up 7 | to other modules included with Parcon in order to allow railroad diagrams that 8 | look different to be created. The main one of these is the submodule raildraw, 9 | which is a from-scratch railroad diagram drawing engine that can take a 10 | railroad diagram as specified by classes in this module and convert it to a PNG 11 | image. 12 | 13 | Here's a really simple example that uses raildraw to draw a syntax diagram: 14 | 15 | from parcon import First 16 | # Create a parser to draw 17 | some_parser = "Hello, " + First("world", "all you people") 18 | # Then draw a diagram of it. 19 | some_parser.draw_railroad_to_png({}, "test.png") 20 | """ 21 | 22 | from itertools import chain 23 | from parcon import ordered_dict 24 | 25 | PRODUCTION = 1 26 | TEXT = 2 27 | ANYCASE = 3 28 | DESCRIPTION = 4 29 | 30 | class Component(object): 31 | def copy(self): 32 | raise NotImplementedError 33 | 34 | def optimize(self): 35 | pass 36 | 37 | def __repr__(self): 38 | return self.__str__() 39 | 40 | 41 | class Nothing(Component): 42 | def copy(self): 43 | return Nothing() 44 | 45 | def __str__(self): 46 | return "Nothing()" 47 | 48 | 49 | class Then(Component): 50 | def __init__(self, *constructs): 51 | self.constructs = list(constructs) 52 | 53 | def copy(self): 54 | return Then(*[c.copy() for c in self.constructs]) 55 | 56 | def optimize(self): 57 | modified = True 58 | while modified: 59 | modified = False 60 | old_constructs = self.constructs 61 | new_constructs = list(chain(*[c.constructs if isinstance(c, Then) else [c] for c in old_constructs if not isinstance(c, Nothing)])) 62 | if old_constructs != new_constructs: 63 | modified = True 64 | self.constructs = new_constructs 65 | for construct in self.constructs: 66 | construct.optimize() 67 | 68 | def __str__(self): 69 | return "Then(%s)" % ", ".join([repr(c) for c in self.constructs]) 70 | 71 | 72 | class Or(Component): 73 | def __init__(self, *constructs): 74 | self.constructs = list(constructs) 75 | 76 | def copy(self): 77 | return Or(*[c.copy() for c in self.constructs]) 78 | 79 | def optimize(self): 80 | modified = True 81 | while modified: 82 | modified = False 83 | old_constructs = self.constructs 84 | new_constructs = list(chain(*[c.constructs if isinstance(c, Or) else [c] for c in old_constructs])) 85 | if old_constructs != new_constructs: 86 | modified = True 87 | self.constructs = new_constructs 88 | for construct in self.constructs: 89 | construct.optimize() 90 | 91 | def __str__(self): 92 | return "Or(%s)" % ", ".join([repr(c) for c in self.constructs]) 93 | 94 | 95 | class Token(Component): 96 | def __init__(self, type, text): 97 | assert type >= 1 and type <= 4 98 | self.type = type 99 | self.text = text 100 | 101 | def copy(self): 102 | return Token(self.type, self.text) 103 | 104 | def __str__(self): 105 | return "Token(%s, %s)" % (repr(self.type), repr(self.text)) 106 | 107 | 108 | class Loop(Component): 109 | def __init__(self, component, delimiter): 110 | self.component = component 111 | self.delimiter = delimiter 112 | 113 | def copy(self): 114 | return Loop(self.component.copy(), self.delimiter.copy()) 115 | 116 | def optimize(self): 117 | if isinstance(self.component, Loop): 118 | self.component, self.delimiter = self.component.component, Or(self.component.delimiter, self.delimiter) 119 | self.optimize() 120 | return 121 | self.component.optimize() 122 | self.delimiter.optimize() 123 | 124 | def __str__(self): 125 | return "Loop(%s, %s)" % (repr(self.component), repr(self.delimiter)) 126 | 127 | 128 | class Bullet(Component): 129 | def copy(self): 130 | return Bullet() 131 | 132 | def __str__(self): 133 | return "Bullet()" 134 | 135 | 136 | class Railroadable(object): 137 | """ 138 | A class representing an object that can be drawn as a railroad diagram. 139 | Most Parcon parsers subclass this class in addition to parcon.Parser. 140 | """ 141 | railroad_children = [] 142 | railroad_production_name = None 143 | railroad_production_delegate = None 144 | 145 | def create_railroad(self, options): 146 | raise NotImplementedError 147 | 148 | def get_productions(self): 149 | map = ordered_dict.OrderedDict() 150 | visited = set() 151 | self._list_productions(map, visited) 152 | # TODO: in the future, check that each possible result for a given 153 | # production generates a railroad that means syntactically the same 154 | # thing. For now, we're just going to use the first one in the list. 155 | return ordered_dict.OrderedDict([(k, v[0]) for k, v in map.items()]) 156 | 157 | def _list_productions(self, map, visited): 158 | if self in visited: # Already visited this object 159 | return 160 | visited.add(self) 161 | if self.railroad_production_name is not None: 162 | the_list = map.get(self.name, None) 163 | if not the_list: 164 | the_list = [] 165 | map[self.name] = the_list 166 | if self.railroad_production_delegate not in the_list: 167 | the_list.append(self.railroad_production_delegate) 168 | for r in self.railroad_children: 169 | ensure_railroadable(r) 170 | r._list_productions(map, visited) 171 | 172 | def draw_railroad_to_png(self, options, filename): 173 | """ 174 | Draws a syntax diagram for this object to the specified .png image file 175 | using the specified options. For now, just pass {} (i.e. an empty 176 | dictionary) as options; I'll document what this actually does at a 177 | later date. 178 | """ 179 | # raildraw /has/ to be imported here, not at the top of the module, 180 | # because it depends on us and circular dependency issues will arise 181 | # if the import is done at the top of this module 182 | from parcon.railroad import raildraw as _raildraw 183 | diagram = Then(Bullet(), self.create_railroad(options), Bullet()) 184 | _raildraw.draw_to_png(diagram, options, filename) 185 | del _raildraw 186 | 187 | def draw_productions_to_png(self, options, filename, tail=[]): 188 | self.draw_productions_to_image('png', options, filename, tail) 189 | 190 | def draw_productions_to_svg(self, options, filename, tail=[]): 191 | self.draw_productions_to_image('svg', options, filename, tail) 192 | 193 | def draw_productions_to_image(self, img_type, options, filename, tail): 194 | productions = self.get_productions() 195 | if len(productions) == 0: 196 | raise Exception("No named productions to generate") 197 | # Sort the specified tail productions to the end 198 | for name in tail: 199 | if name in productions: 200 | value = productions[name] 201 | del productions[name] 202 | productions[name] = value 203 | from parcon.railroad import raildraw as _raildraw 204 | _raildraw.draw_to_image(img_type, ordered_dict.OrderedDict([(k, 205 | Then(Bullet(), v.create_railroad(options), Bullet())) 206 | for k, v in productions.items()]), options, filename) 207 | del _raildraw 208 | 209 | 210 | def ensure_railroadable(value): 211 | if not isinstance(value, Railroadable): 212 | raise Exception("Trying to create a railroad diagram for an object of " 213 | "class " + str(type(value)) + " but that type is not a " 214 | "subclass of Railroadable, so this is not allowed.") 215 | 216 | 217 | def create_railroad(value, options): 218 | ensure_railroadable(value) 219 | return value.create_railroad(options) 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | -------------------------------------------------------------------------------- /parcon/ordered_dict.py: -------------------------------------------------------------------------------- 1 | # This entire file, with a few modifications, is courtesy of 2 | # http://code.activestate.com/recipes/576693/ and is used so that Parcon can be 3 | # used with Python 2.6, which doesn't have an OrderedDict class. OrderedDict is 4 | # used by some stuff related to syntax diagram generation. 5 | 6 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 7 | # Passes Python2.7's test suite and incorporates all the latest updates. 8 | 9 | import six 10 | try: 11 | from thread import get_ident as _get_ident 12 | except ImportError: 13 | try: 14 | from dummy_thread import get_ident as _get_ident 15 | except ImportError: 16 | from _dummy_thread import get_ident as _get_ident 17 | 18 | try: 19 | from _abcoll import KeysView, ValuesView, ItemsView 20 | except ImportError: 21 | pass 22 | 23 | 24 | class OrderedDict(dict): 25 | 'Dictionary that remembers insertion order' 26 | # An inherited dict maps keys to values. 27 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 28 | # The remaining methods are order-aware. 29 | # Big-O running times for all methods are the same as for regular dictionaries. 30 | 31 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 32 | # The circular doubly linked list starts and ends with a sentinel element. 33 | # The sentinel element never gets deleted (this simplifies the algorithm). 34 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 35 | 36 | def __init__(self, *args, **kwds): 37 | '''Initialize an ordered dictionary. Signature is the same as for 38 | regular dictionaries, but keyword arguments are not recommended 39 | because their insertion order is arbitrary. 40 | 41 | ''' 42 | if len(args) > 1: 43 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 44 | try: 45 | self.__root 46 | except AttributeError: 47 | self.__root = root = [] # sentinel node 48 | root[:] = [root, root, None] 49 | self.__map = {} 50 | self.__update(*args, **kwds) 51 | 52 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 53 | 'od.__setitem__(i, y) <==> od[i]=y' 54 | # Setting a new item creates a new link which goes at the end of the linked 55 | # list, and the inherited dictionary is updated with the new key/value pair. 56 | if key not in self: 57 | root = self.__root 58 | last = root[0] 59 | last[1] = root[0] = self.__map[key] = [last, root, key] 60 | dict_setitem(self, key, value) 61 | 62 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 63 | 'od.__delitem__(y) <==> del od[y]' 64 | # Deleting an existing item uses self.__map to find the link which is 65 | # then removed by updating the links in the predecessor and successor nodes. 66 | dict_delitem(self, key) 67 | link_prev, link_next, key = self.__map.pop(key) 68 | link_prev[1] = link_next 69 | link_next[0] = link_prev 70 | 71 | def __iter__(self): 72 | 'od.__iter__() <==> iter(od)' 73 | root = self.__root 74 | curr = root[1] 75 | while curr is not root: 76 | yield curr[2] 77 | curr = curr[1] 78 | 79 | def __reversed__(self): 80 | 'od.__reversed__() <==> reversed(od)' 81 | root = self.__root 82 | curr = root[0] 83 | while curr is not root: 84 | yield curr[2] 85 | curr = curr[0] 86 | 87 | def clear(self): 88 | 'od.clear() -> None. Remove all items from od.' 89 | try: 90 | for node in six.itervalues(self.__map): 91 | del node[:] 92 | root = self.__root 93 | root[:] = [root, root, None] 94 | self.__map.clear() 95 | except AttributeError: 96 | pass 97 | dict.clear(self) 98 | 99 | def popitem(self, last=True): 100 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 101 | Pairs are returned in LIFO order if last is true or FIFO order if false. 102 | 103 | ''' 104 | if not self: 105 | raise KeyError('dictionary is empty') 106 | root = self.__root 107 | if last: 108 | link = root[0] 109 | link_prev = link[0] 110 | link_prev[1] = root 111 | root[0] = link_prev 112 | else: 113 | link = root[1] 114 | link_next = link[1] 115 | root[1] = link_next 116 | link_next[0] = root 117 | key = link[2] 118 | del self.__map[key] 119 | value = dict.pop(self, key) 120 | return key, value 121 | 122 | # -- the following methods do not depend on the internal structure -- 123 | 124 | def keys(self): 125 | 'od.keys() -> list of keys in od' 126 | return list(self) 127 | 128 | def values(self): 129 | 'od.values() -> list of values in od' 130 | return [self[key] for key in self] 131 | 132 | def items(self): 133 | 'od.items() -> list of (key, value) pairs in od' 134 | return [(key, self[key]) for key in self] 135 | 136 | def iterkeys(self): 137 | 'od.iterkeys() -> an iterator over the keys in od' 138 | return iter(self) 139 | 140 | def itervalues(self): 141 | 'od.itervalues -> an iterator over the values in od' 142 | for k in self: 143 | yield self[k] 144 | 145 | def iteritems(self): 146 | 'od.iteritems -> an iterator over the (key, value) items in od' 147 | for k in self: 148 | yield (k, self[k]) 149 | 150 | def update(*args, **kwds): #@NoSelf 151 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 152 | 153 | If E is a dict instance, does: for k in E: od[k] = E[k] 154 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 155 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 156 | In either case, this is followed by: for k, v in F.items(): od[k] = v 157 | 158 | ''' 159 | if len(args) > 2: 160 | raise TypeError('update() takes at most 2 positional ' 161 | 'arguments (%d given)' % (len(args),)) 162 | elif not args: 163 | raise TypeError('update() takes at least 1 argument (0 given)') 164 | self = args[0] 165 | # Make progressively weaker assumptions about "other" 166 | other = () 167 | if len(args) == 2: 168 | other = args[1] 169 | if isinstance(other, dict): 170 | for key in other: 171 | self[key] = other[key] 172 | elif hasattr(other, 'keys'): 173 | for key in other.keys(): 174 | self[key] = other[key] 175 | else: 176 | for key, value in other: 177 | self[key] = value 178 | for key, value in kwds.items(): 179 | self[key] = value 180 | 181 | __update = update # let subclasses override update without breaking __init__ 182 | 183 | __marker = object() 184 | 185 | def pop(self, key, default=__marker): 186 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 187 | If key is not found, d is returned if given, otherwise KeyError is raised. 188 | 189 | ''' 190 | if key in self: 191 | result = self[key] 192 | del self[key] 193 | return result 194 | if default is self.__marker: 195 | raise KeyError(key) 196 | return default 197 | 198 | def setdefault(self, key, default=None): 199 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 200 | if key in self: 201 | return self[key] 202 | self[key] = default 203 | return default 204 | 205 | def __repr__(self, _repr_running={}): 206 | 'od.__repr__() <==> repr(od)' 207 | call_key = id(self), _get_ident() 208 | if call_key in _repr_running: 209 | return '...' 210 | _repr_running[call_key] = 1 211 | try: 212 | if not self: 213 | return '%s()' % (self.__class__.__name__,) 214 | return '%s(%r)' % (self.__class__.__name__, self.items()) 215 | finally: 216 | del _repr_running[call_key] 217 | 218 | def __reduce__(self): 219 | 'Return state information for pickling' 220 | items = [[k, self[k]] for k in self] 221 | inst_dict = vars(self).copy() 222 | for k in vars(OrderedDict()): 223 | inst_dict.pop(k, None) 224 | if inst_dict: 225 | return (self.__class__, (items,), inst_dict) 226 | return self.__class__, (items,) 227 | 228 | def copy(self): 229 | 'od.copy() -> a shallow copy of od' 230 | return self.__class__(self) 231 | 232 | @classmethod 233 | def fromkeys(cls, iterable, value=None): 234 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 235 | and values equal to v (which defaults to None). 236 | 237 | ''' 238 | d = cls() 239 | for key in iterable: 240 | d[key] = value 241 | return d 242 | 243 | def __eq__(self, other): 244 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 245 | while comparison to a regular mapping is order-insensitive. 246 | 247 | ''' 248 | if isinstance(other, OrderedDict): 249 | return len(self)==len(other) and self.items() == other.items() 250 | return dict.__eq__(self, other) 251 | 252 | def __ne__(self, other): 253 | return not self == other 254 | 255 | # -- the following methods are only used in Python 2.7 -- 256 | 257 | def viewkeys(self): 258 | "od.viewkeys() -> a set-like object providing a view on od's keys" 259 | return KeysView(self) 260 | 261 | def viewvalues(self): 262 | "od.viewvalues() -> an object providing a view on od's values" 263 | return ValuesView(self) 264 | 265 | def viewitems(self): 266 | "od.viewitems() -> a set-like object providing a view on od's items" 267 | return ItemsView(self) 268 | -------------------------------------------------------------------------------- /parcon/static.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | A static typing library for Python. That may sound at first as if this module 4 | was designed to simply decorate methods specifying the type of objects that 5 | must be passed to them, and it can definitely do that. However, it's quite a 6 | bit more powerful than that. It has a collection of constructs that allow 7 | constructing type patterns, objects that allow a form of pattern matching 8 | against Python objects. For example, And(Type(list), All(Type(int))) is a type 9 | pattern that matches all objects that are instances of list and whose values 10 | are all ints. All(Type(int)) would match any iterable object, not just a list, 11 | whose values are ints, while Or(Not(Iterable()), All(Type(int))) would 12 | additionally match objects that are not iterable, and Type(int) would simply 13 | match objects of type int. 14 | 15 | A short notation can be used to represent some of the type constructs. These 16 | must be passed to the compile function to convert them to type patterns for 17 | actual use. Any Python type is a type pattern matching objects of that type. 18 | A list containing one item, a type pattern (short or otherwise), is a type 19 | pattern matching objects that are iterable and whose values are all of that 20 | type and a tuple containing one or more items is a type pattern that matches 21 | any object that matches at least one of its contained types. In that way, 22 | Python types are converted to instances of Type, lists are converted to 23 | instances of All, and tuples are converted to instances of Or. 24 | 25 | Type patterns have two methods, matches and check_matches. Both take a single 26 | argument, the value to match against. matches returns true if the specified 27 | value matches the type pattern on which the matches function was called. 28 | check_matches calls matches and throws a StaticTypeError if it returned false. 29 | 30 | Each of the type pattern constructs clearly defines what fields it creates, 31 | which allows for metatyping: creating type patterns that match type patterns 32 | themselves. Such a thing is used in JPath's query optimizer, where the 33 | optimizer uses metatyping to determine if the type pattern that an optimizer 34 | will be called for makes any definitive assertions as to what type of compiler 35 | production it operates on, which allows the compiler to significantly decrease 36 | the time it takes to look up the set of optimizations to be applied to a 37 | particular compiler production. 38 | """ 39 | 40 | class StaticTypeError(Exception): 41 | """ 42 | An exception thrown when an object passed to check_matches does not match 43 | the specified static type. 44 | """ 45 | pass 46 | 47 | 48 | class TypeFormatError(Exception): 49 | """ 50 | An exception thrown when a static type is malformed. This could happen if, 51 | for example, the number 5 was passed to the compile function; 5 is 52 | obviously not a valid static type, so a TypeFormatError would be raised. 53 | """ 54 | pass 55 | 56 | 57 | class InternalError(Exception): 58 | """ 59 | An exception thrown when an internal problem occurs with the static type 60 | library. This usually indicates a bug in this library. 61 | """ 62 | pass 63 | 64 | 65 | class StaticType(object): 66 | """ 67 | The class that all static types extend from. It has two useful methods: 68 | matches and check_matches. 69 | 70 | StaticType cannot itself be instantiated; you can only construct instances 71 | of subclasses of StaticType. 72 | """ 73 | def matches(self, value): 74 | """ 75 | Checks to see if the specified object matches this static type. If it 76 | does, True will be returned, and False will be returned if it doesn't. 77 | Subclasses of StaticType must override this to perform the actual 78 | matching; StaticType's implementation throws an InternalError. 79 | """ 80 | raise InternalError("StaticType subclass " + str(type(self)) + 81 | " doesn't implement the matches function") 82 | 83 | def check_matches(self, value): 84 | """ 85 | Calls self.matches(value). If the reslt is false, a StaticTypeError is 86 | raised. If the result is true, this method simply returns. 87 | """ 88 | if not self.matches(value): 89 | raise StaticTypeError("Value " + str(value) + " is not of type " + 90 | str(self)); 91 | 92 | def __str__(self): 93 | raise Exception(str(type(self)) + " does not provide __str__") 94 | 95 | def __repr__(self): 96 | return self.__str__() 97 | 98 | class Type(StaticType): 99 | """ 100 | A static type that checks to make sure values are instances of a 101 | particular Python type as per Python's bult-in isinstance function. 102 | 103 | The type is stored in a field named type. 104 | """ 105 | def __init__(self, type): 106 | self.type = type 107 | 108 | def matches(self, value): 109 | return isinstance(value, self.type) 110 | 111 | def __str__(self): 112 | return "Type(" + str(self.type) + ")" 113 | 114 | 115 | class Or(StaticType): 116 | """ 117 | A static type that matches a value if any of its constructs match that 118 | particular value. The constructs are stored in a field named constructs. 119 | """ 120 | def __init__(self, *constructs): 121 | self.constructs = [compile(c) for c in constructs] 122 | 123 | def matches(self, value): 124 | for c in self.constructs: 125 | if c.matches(value): 126 | return True 127 | return False 128 | 129 | def __str__(self): 130 | return "Or(" + ", ".join(str(c) for c in self.constructs) + ")" 131 | 132 | 133 | class And(StaticType): 134 | """ 135 | A static type that matches a value if all of its constructs match that 136 | particular value. The constructs are stored in a field named constructs. 137 | """ 138 | def __init__(self, *constructs): 139 | self.constructs = [compile(c) for c in constructs] 140 | 141 | def matches(self, value): 142 | for c in self.constructs: 143 | if not c.matches(value): 144 | return False 145 | return True 146 | 147 | def __str__(self): 148 | return "And(" + ", ".join(str(c) for c in self.constructs) + ")" 149 | 150 | 151 | class Not(StaticType): 152 | """ 153 | A static type that matches a value if that particular value does not match 154 | the construct with which this Not instance was created. The construct is 155 | stored in a field named construct. 156 | """ 157 | def __init__(self, construct): 158 | self.construct = compile(construct); 159 | 160 | def matches(self, value): 161 | return not self.construct.matches(value) 162 | 163 | def __str__(self): 164 | return "Not(" + str(self.construct) + ")" 165 | 166 | 167 | class All(StaticType): 168 | """ 169 | A static type that matches a value if that particular value is iterable 170 | and all of its values match the component type with which this All 171 | instance was created. The type is stored in a field named component_type. 172 | """ 173 | def __init__(self, component_type): 174 | self.component_type = compile(component_type) 175 | 176 | def matches(self, value): 177 | try: 178 | iterator = iter(value) 179 | except TypeError: # Not an iterable type 180 | return False 181 | for item in iterator: 182 | if not self.component_type.matches(item): 183 | return False 184 | return True 185 | 186 | def __str__(self): 187 | return "All(" + str(self.component_type) + ")" 188 | 189 | 190 | class Any(StaticType): 191 | """ 192 | A static type that matches a value if that particular value is iterable 193 | and any of its values match the component type with which this All 194 | instance was created. The type is stored in a field named component_type. 195 | """ 196 | def __init__(self, component_type): 197 | self.component_type = compile(component_type) 198 | 199 | def matches(self, value): 200 | try: 201 | iterator = iter(value) 202 | except TypeError: # Not an iterable type 203 | return False 204 | for item in iterator: 205 | if self.component_type.matches(item): 206 | return True 207 | return False 208 | 209 | def __str__(self): 210 | return "Any(" + str(self.component_type) + ")" 211 | 212 | 213 | class Field(StaticType): 214 | """ 215 | A static type that matches a value if that particular value has all of the 216 | fields named when constructing this Field instance and they are all match 217 | the type specified when constructing this Field instance. The field type 218 | is stored in a field named field_type and the field names are stored in a 219 | field named field_names. 220 | """ 221 | def __init__(self, field_type, *field_names): 222 | self.field_type = compile(field_type) 223 | self.field_names = list(field_names) 224 | 225 | def matches(self, value): 226 | for name in self.field_names: 227 | try: 228 | field_value = getattr(value, name) 229 | if not self.field_type.matches(field_value): 230 | return False 231 | except AttributeError: # No such attribute, so return false 232 | return False 233 | return True 234 | 235 | def __str__(self): 236 | return "Field(" + ", ".join([str(self.field_type)] + list(self.field_names)) + ")" 237 | 238 | 239 | class Iterable(StaticType): 240 | """ 241 | A static type that matches a value if the value is iterable. A value is 242 | iterable if calling the Python function iter(value) does not raise a 243 | TypeError. 244 | """ 245 | def __init__(self): 246 | pass 247 | 248 | def matches(self, value): 249 | try: 250 | iter(value) 251 | return True 252 | except TypeError: 253 | return False 254 | 255 | def __str__(self): 256 | return "Iterable()" 257 | 258 | 259 | class Sequence(StaticType): 260 | """ 261 | A static type that matches a value if the value is a sequence. A value is 262 | defined to be a sequence if calling len(value) does not raise a TypeError. 263 | """ 264 | def matches(self, value): 265 | try: 266 | len(value) 267 | return True 268 | except TypeError: 269 | return False 270 | 271 | def __str__(self): 272 | return "Sequence()" 273 | 274 | 275 | class Positional(StaticType): 276 | """ 277 | A static type that matches a value if the value is a sequence, it has 278 | exactly the same number of value as were passed to the Positional instance 279 | when it was created, and each item matches the corresponding static type 280 | passed to the Positional instance when it was created. For example, 281 | Positional(int, str, bool) would match a sequence of length 3 containing 282 | an integer, a string, and a boolean, at each respective position in the 283 | sequence. 284 | """ 285 | def __init__(self, *types): 286 | self.types = [compile(type) for type in types] 287 | 288 | def matches(self, value): 289 | if len(self.types) != len(value): 290 | return False 291 | for t, v in zip(self.types, value): 292 | if not t.matches(v): 293 | return False 294 | return True 295 | 296 | def __str__(self): 297 | return "Positional(%s)" % ", ".join(str(t) for t in self.types) 298 | 299 | 300 | class Is(StaticType): 301 | """ 302 | A static type that matches a value if the value is equal, as determined by 303 | the == operator, to a specified value. 304 | """ 305 | def __init__(self, value): 306 | self.value = value 307 | 308 | def matches(self, value): 309 | return self.value == value 310 | 311 | 312 | class Everything(StaticType): 313 | """ 314 | A static type that matches all values. 315 | """ 316 | def __init__(self): 317 | pass 318 | 319 | def matches(self, value): 320 | return True 321 | 322 | def __str__(self): 323 | return "Everything()" 324 | 325 | 326 | def compile(short_type): 327 | """ 328 | Compiles the specified static type. This involves converting Python classes 329 | to instances of Type, tuples to instances of Or, and lists to instances of 330 | All. Instances of one of StaticType's subclasses are returned as-is, so 331 | this function doesn't need to be called on them. 332 | 333 | This function is essentially analogous to Parcon and Pargen's promote 334 | functions. 335 | """ 336 | if isinstance(short_type, StaticType): # Already compiled 337 | return short_type; 338 | if isinstance(short_type, list): 339 | if len(short_type) != 1: 340 | raise TypeFormatError("Lists in types must be of length 1, but " 341 | + str(short_type) + " has length " + str(len(short_type))) 342 | component_type = short_type[0] 343 | return All(component_type) 344 | if isinstance(short_type, tuple): 345 | return Or(*short_type) 346 | if not isinstance(short_type, type): 347 | raise TypeFormatError("Type " + str(short_type) + " is not an " 348 | "instance of StaticType (or one of its subclasses) or a " 349 | "Python class or a list or a tuple.") 350 | return Type(short_type) 351 | 352 | 353 | def matches(value, type): 354 | """ 355 | Short for compile(type).matches(value). 356 | """ 357 | return compile(type).matches(value) 358 | 359 | 360 | def check_matches(value, type): 361 | """ 362 | Short for compile(type).check_matches(value). 363 | """ 364 | compile(type).check_matches(value) -------------------------------------------------------------------------------- /technotes/railroad.txt: -------------------------------------------------------------------------------- 1 | Parcon currently has support for visualizing parsers by creating a Graphviz diagram. This is great for people writing Parcon grammars, but it's not very helpful to people trying to write text in whatever language a particular Parcon grammar parses. So I'm thinking of adding support to Parcon for drawing railroad diagrams. 2 | 3 | So... I'm thinking I'd probably just use the SQLite script for now, and require Tcl/Tk to be installed in order for railroad generation to work. At some later point I'd probably port that script to Python to allow it to be used without Tcl/Tk needing to be installed. 4 | 5 | So, what should each parser translate into, in the railroad diagram? 6 | 7 | Here's some of the ones I've thought so far: 8 | 9 | Optional: a split into two lines, one of which just goes straight from one side to the other, and the other line contains whatever the Optional wraps. 10 | 11 | OneOrMore: a loop back from the end of whatever it wraps back to the beginning. 12 | 13 | ZeroOrMore: displayed the same as Optional(OneOrMore(...)). 14 | 15 | First: displayed as a branch for each of the possible parsers. 16 | 17 | Longest: probably displayed the same as First for now; need to think of how to differentiate these two. 18 | 19 | Then: displayed as one parser on a line followed by the next parser on the same line. 20 | 21 | InfixExpr: the component surrounded by a loop back containing one branch for each operator. I may change this to have InfixExprs nested in each others' components flatten out if I can make sure that this will still always be correct. 22 | 23 | Those are the main ones that I can think of right now. Most of the others, such as Translate, would simply pass through their contained parser unmodified; some, however, like Present, And, and Not, will throw exceptions until I figure out how those should be represented in a railroad diagram. 24 | 25 | Of course, there will be parser classes that function simply as wrappers around other parser classes that help with creating railroad diagrams. I'm thinking that there will be two for now: RRName and RRDesc. 26 | 27 | RRName is a wrapper around a parser that specifies a production name for that parser; instead of displaying that parser in a railroad diagram generated from a parser containing that one, a box with the specified name will be used instead. This is useful for splitting out the grammar into logical productions. 28 | 29 | RRDesc is similar to RRName, but it will be displayed differently so that it will appear as a terminal with a specified description. This is good when there's simply no good way to represent the parser as a railroad diagram (and it most likely contains parsers that throw exceptions if such a thing were to be tried), and so a description would be better. 30 | 31 | Railroad generation won't descend into the underlying parser of a RRName or an RRDesc, instead showing its name or its description, respectively, in place. This means that parsers that would otherwise throw an exception on railroad generation can be wrapped with RRName or RRDesc and then used in other, more complicated grammars, and railroad diagrams can be generated for everything except those wrapped parsers that contain exception-throwing railroad generating parsers. 32 | 33 | I'm tempted to have __call__, when passed a keyword argument named "name" or "desc" (or "description" for the latter), to create and return a wrapping RRName or RRDesc, respectively, for that parser and name/description. I'll need to think about that a bit more. 34 | 35 | 36 | 37 | So, the more I think about this, the more I'm thinking that writing a railroad renderer from scratch would be awesome. But difficult. So I'm thinking there should be some intermediate form that all parsers know about, that's a sort of language for describing railroad diagrams, or rather, a set of Python classes for describing railroad diagrams. 38 | 39 | Parsers would know how to generate things using these classes, and that's it. 40 | 41 | Then there could be some converters to go from this representation to an actual graphical version in some form. I'm thinking that the first one will probably use SQLite's Tcl/Tk generation script, with a few minor changes. A second version will probably be one that I write myself in Python, probably based on the Tck/Tk script. 42 | 43 | So, what classes are needed? Let's see... 44 | 45 | Token: A class that represents a single element in a railroad diagram. It has two fields (and two constructor parameters): type and text. Type is one of four constants: PRODUCTION, TEXT, ANYCASE, or DESCRIPTION. I haven't yet worked out the specifics of how each of these will be displayed. 46 | 47 | Or: A class that represents a list of two or more components in a railroad diagram. They will be displayed as a branch containing each item in the order it was present from top to bottom. 48 | 49 | Nothing: A class that represents nothing, I.E. a straight line in the railroad diagram. Its purpose is to allow optional components of the grammar to be created: Or(Nothing(), some_other_component) would be a good representation of parcon.Optional(parser_corresponding_to_some_other_component). 50 | 51 | Then: A class representing several components in a line, each followed by the next. This is typically drawn as each component on a line. 52 | 53 | Loop: A loop back on a particular component. A loop has two components: the component (to borrow InfixExpr's terminology) and the delimiter. Either one can be Nothing. A railroad generator should make sure that it reverses the order of components present in Then instances in the delimiter of a loop since it will be followed from right to left. (I'm planning on writing a generator that uses SQLite's engine; it already does this, so the generator for it won't need to actually take this into account.) 54 | 55 | I think that's just about it. 56 | 57 | ------------------------------------------------------------------------------- 58 | 59 | So now we get on to how we're actually going to draw these. I'm going to use 60 | Cairo to do the actual drawing, but what, exactly, do we do to draw a railroad 61 | diagram? 62 | 63 | Well... 64 | 65 | We need functions for computing the size of any given construct, in pixels. So 66 | we have one function for each construct to give the width and height of the 67 | construct, in pixels. 68 | 69 | It should also specify where, along the y axis, the line should enter and exit 70 | that particular construct. So our size-generating method returns a 3-tuple: 71 | (width, height, line_position). 72 | 73 | Sizes, by the way, are going to start with 0 in the upper-left corner. Positive 74 | X is to the right, and positive Y is down. This mirrors how normal windowing 75 | coordinates work. 76 | 77 | Then we have functions for each construct to draw it forwards, and functions 78 | for each construct to draw it backwards. The reason for the backwards drawing 79 | is that the south end of a loop, the end that goes back toward the beginning, 80 | should be drawn backwards. Of course, if a loop is being drawn backwards 81 | because it's in the tail end of another loop, then its own tail end should be 82 | drawn forwards, thereby reversing the direction again. 83 | 84 | We give these constructs the X and Y position at which to draw themselves, when 85 | we instruct them to, and they do it. (And obviously we give them a Cairo 86 | context on which to draw themselves.) 87 | 88 | Then when we want to draw a particular construct into an image, we ask it to 89 | compute its size, then create a cairo.ImageSurface of that size and tell it to 90 | draw itself onto that image at position 0, 0. (I'm not sure what coordinates 91 | Cairo uses by default, so a transform may need to be added to convert these 92 | coordinates that I've laid out into Cairo coordinates. If I remember correctly, 93 | Cairo has a facility for doing that, so the functions for drawing constructs 94 | won't need to know about this or transform their coordinates in any way.) 95 | 96 | So this make sense so far. Now let's see about each of the constructs and how 97 | they'll compute their sizes and stuff... 98 | 99 | (Oh, and there's a dict of options passed recursively to each of these methods 100 | that can be used to specify drawing options.) 101 | 102 | So let's see... 103 | 104 | Token. 105 | 106 | When asked to compute its size, it computes the size of the text it contains 107 | with a certain font. This font would be specified in the options, and a default 108 | would be used if one was not specified. It then adds the thickness of the line 109 | that's supposed to surround it (probably an option) twice to the expected 110 | height to account for the line at the top and the line at the bottom, and then 111 | some padding to pad the text. If it's supposed to be drawn as a square (which 112 | is if it's of type PRODUCTION or DESCRIPTION), then it adds that same amount to 113 | the sides for the lines surrounding it. Otherwise, it adds that same amount 114 | plus the height of the text to the width (since the semicircles will end up 115 | being the same size as the text, so that accounts for them). The line height is 116 | half the overall height to center the line in the text. 117 | 118 | When asked to draw itself, it draws the text right in the middle of how big its 119 | computed size is. It then draws a box just within its computed size if it's a 120 | box one, or two semicircles just within its size to the left and the right, and 121 | lines connecting them, if it's a circle one. 122 | 123 | I'm also thinking that the space between the text and the outer line of the 124 | circle/square, and the spacing between that line and the width/height of the 125 | Token, and perhaps the thickness of the line, should be options. 126 | 127 | Nothing. 128 | 129 | This one's easy. Its height and width are that of an arrow symbol. To draw 130 | itself, it simply draws an arrow pointing right for forwards drawing or left 131 | for backwards drawing. 132 | 133 | Then. 134 | 135 | A little bit more tricky, but not impossible by any means. The width is the 136 | width of all of its constructs added together, plus the width of one arrow-line 137 | for each construct except the last one. Height is a bit tricker. What we do is 138 | iterate over the sizes of all of the constructs and create an above height, 139 | which is the amount of that construct above its line (and this will therefore 140 | be equal to the line_position for that construct) and a below height, which is 141 | the amount of that construct below its line (and this will therefore be equal 142 | to the overall height of the construct minus its line height). The height of 143 | the Then is then the maximum above height out of these constructs plus the 144 | maximum below height out of these constructs. The returned line height is equal 145 | to the maximum above height. 146 | 147 | To draw a Then, we create a variable holding the current X position, which we 148 | initialize to 0. We then compute the size of this Then as above, and store the 149 | line height. Then we start with the first construct and draw it at our current 150 | X position, which is 0. Its Y position is the overall line height minus this 151 | construct's line height. Once we've drawn it, we add its width to the current 152 | position variable. Then, if this is not the last construct, we draw an 153 | arrow-line at the current position, and add the width of an arrow-line to the 154 | current X position. 155 | 156 | To draw a Then backwards, we do the same thing but in reverse: we initialize 157 | our current X position to be the computed width of this Then, and subtract each 158 | construct from it as we go. When we draw each construct, though, we subtract 159 | its width from the current X position so that the point we get is where we want 160 | the upper-left corner of the construct to be. We also draw the arrow-liunes 161 | pointing left instead of right. 162 | 163 | Or. 164 | 165 | A little bit more tricky than Then, primarily because of all the curves. The 166 | width is the width of its widest component, plus the diameter to use when 167 | drawing the ascenders and descenders, plus the width of two arrow heads; one 168 | before each construct in the Or and one after. 169 | 170 | The height is the height of all the components added together with a certain 171 | amount of spacing (probably configurable via options) added between. For now 172 | I'll just assume that there's sufficient space to draw the ascenders and 173 | descenders; if there isn't, it'll be up to the user to shrink the diameter of 174 | the ascenders and descenders accordingly. The default diameter will be a value 175 | that will render correctly. 176 | 177 | The line position is the same as the line position of the first construct. 178 | 179 | So, to draw an Or... We draw all the components, top to bottom, with the 180 | appropriate spacing in between; this is done much the same as drawing Then 181 | instances, but top-to-bottom instead of left-to-right. The X at which the 182 | components are drawn is the diameter of the descenders, plus the width of one 183 | arrowhead. 184 | 185 | Then we draw a line from the left side at the line height of the first item 186 | from the left side to the first item itself, with an arrowhead at the end. We 187 | then draw a similar line on the right side, but this one without an arrowhead. 188 | 189 | Then we draw a curve at the left starting on the left side at the line height 190 | and going so that it points directly down. We then draw a line from there down 191 | to (total_width_of_this_Or_construct - height_of_the_last_construct + 192 | line_position_of_the_last_construct - (diameter_of_descenders / 2). The effect 193 | of this is that the line ends one descender radius above where the line is 194 | supposed to enter the last construct. 195 | 196 | We then draw a similar curve at the right side, starting at the far right at 197 | the line height and curving down. We draw a similar line from there down to the 198 | same Y position as the line we just drew on the other side. 199 | 200 | Then we start with the second construct in the Or, and continue down through 201 | each construct. For each of them, we work out the line position of the... hm... 202 | I sort of stopped writing a few days ago at that ellipsis, and never finished, 203 | but I just went ahead and implemented Or anyway... so yeah, read the source of 204 | parcon.railroad.raildraw for the rest. 205 | 206 | Loop, however, I haven't implemented yet, and I need to write that out in 207 | english to make my brain make sense of it. So... 208 | 209 | Loop. 210 | 211 | I imagine this will be quite similar to Or. This is essentially really the same 212 | as an Or with two alternatives, the angle of the descenders at the top changed, 213 | and with the second alternative reversed in direction. 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | -------------------------------------------------------------------------------- /parcon/pargen/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pargen is a formatter combinator library. It's much the opposite of parcon: 3 | while parcon parses text into objects, pargen formats objects into text. 4 | I'll get more documentation up on here soon, but for now, here's a JSON 5 | formatter (essentially a simplified reimplementation of Python's json.dumps): 6 | 7 | >>> from parcon.pargen import * 8 | >>> from decimal import Decimal 9 | >>> json = Forward() 10 | >>> number = Type(float, int, long, Decimal) & String() 11 | >>> boolean = Type(bool) & ((Is(True) & "true") | (Is(False) & "false")) 12 | >>> null = Is(None) & "null" 13 | >>> string = Type(str) & '"' + String() + '"' 14 | >>> json_list = Type(list, tuple) & ("[" + ForEach(json, ", ") + "]") 15 | >>> json_map =Type(dict) & ("{" + ForEach(Head(json) + ": " + Tail(json), ", ") + "}") 16 | >>> json << (boolean | number | null | string | json_list | json_map) 17 | 18 | You can then do things like: 19 | 20 | >>> json.format([True,1,{"2":3,"4":None},5,None,False,"hello"]).text 21 | '[true, 1, {"2": 3, "4": null}, 5, null, false, "hello"]' 22 | 23 | You'll probably want to take a look at the Formatter class. It's the "main" 24 | class for pargen, analogous to parcon.Parser. It contains some module 25 | documentation that can probably help to get you started. 26 | """ 27 | 28 | from __future__ import print_function 29 | """ 30 | TODO: write something that can convert a number into its textual representation, 31 | so like the opposite of the number parser example, and then write something that 32 | can format a list of items into, say, "first", "first or second", "first, second, or third", etc, meaning 33 | that it converts a list of items into an english language phrase describing that set properly. Also add 34 | things for extracting keys from maps. 35 | """ 36 | 37 | from parcon import static 38 | import six 39 | 40 | sequence_type = static.Sequence() 41 | sequence_or_dict_type = static.Or(static.Sequence(), static.Type(dict)) 42 | 43 | class Result(object): 44 | """ 45 | A formatter result. Instances of this class are returned from 46 | Formatter.format. 47 | 48 | Two fields are present: text and remainder. If this result represents 49 | failure of a formatter, text will be None and remainder will be unspecified. 50 | If this result represents success, text will be the text produced by the 51 | formatter, and remainder will be the portion of the input object that the 52 | parser did not consume. 53 | 54 | Result objects have a boolean truth value corresponding to whether or not 55 | they succeeded. For example, this could be used to print whether some 56 | particular result succeeded: 57 | 58 | if some_result: 59 | print("Result succeeded") 60 | else: 61 | print("Result failed") 62 | """ 63 | def __init__(self, text, remainder): 64 | self.text = text 65 | self.remainder = remainder 66 | 67 | def __nonzero__(self): 68 | return self.text is not None 69 | 70 | __bool__ = __nonzero__ 71 | 72 | def __repr__(self): 73 | if self: 74 | return "" % (repr(self.text), self.remainder) 75 | else: 76 | return "" 77 | 78 | def __eq__(self, other): 79 | return self.text == other.text and self.remainder == other.remainder 80 | 81 | 82 | def failure(): 83 | """ 84 | Method called by formatters to create a new Result object indicating 85 | failure. Formatters typically fail when their input was not in the format 86 | that they expected it to be, or for other reasons. Head, for example, fails 87 | if the provided value is not a sequence, or if the sequence provided is 88 | empty. 89 | """ 90 | return Result(None, None) 91 | 92 | 93 | def match(text, remainder): 94 | """ 95 | Method called by formatters to create a new Result object indicating 96 | success. text is the text that the formatter produced; this can be the 97 | empty string, but it must be a string of some sort. remainder is the 98 | portion of the input value that the formatter did not consume; parser such 99 | as Then, for example, pass the remainder of their first parser as the value 100 | to their second parser. 101 | """ 102 | return Result(text, remainder) 103 | 104 | 105 | def promote(value): 106 | if isinstance(value, Formatter): 107 | return value 108 | if isinstance(value, six.string_types): 109 | return Literal(value) 110 | return value 111 | 112 | 113 | def reversed(function): 114 | def new_function(x, y): 115 | return function(y, x) 116 | return new_function 117 | 118 | 119 | def op_add(first, second): 120 | return Then(promote(first), promote(second)) 121 | 122 | 123 | def op_and(first, second): 124 | return And(promote(first), promote(second)) 125 | 126 | 127 | def op_or(first, second): 128 | return First(promote(first), promote(second)) 129 | 130 | 131 | class Formatter(object): 132 | """ 133 | The main class of this module, analogous to parcon.Parser, but for 134 | formatters. 135 | 136 | Like parcon.Parser, instances of Formatter should not be directly 137 | constructed; instances of its various subclasses should be created and used 138 | instead. 139 | 140 | The main method that you'll want to look at is format. 141 | """ 142 | def __repr__(self, content=''): 143 | return '{}({})'.format(type(self).__name__, content) 144 | 145 | def format(self, input): 146 | """ 147 | Formats a specified input object into a piece of text. Subclasses of 148 | Formatter should override this and provide an actual implementation. 149 | 150 | The return value of this method is a Result object created by calling 151 | either the match function or the failure function. The former function 152 | is used to indicate success; the latter is used to indicate that the 153 | formatter failed for some reason, such as the input not being of an 154 | appropriate type. 155 | """ 156 | raise Exception("format not implemented for " + str(type(self))) 157 | 158 | __add__ = op_add 159 | __and__ = op_and 160 | __or__ = op_or 161 | __radd__ = reversed(op_add) 162 | __rand__ = reversed(op_and) 163 | __ror__ = reversed(op_or) 164 | 165 | 166 | class Literal(Formatter): 167 | """ 168 | A formatter that outputs a specified piece of literal text. It doesn't 169 | consume any of the input. 170 | """ 171 | def __init__(self, text): 172 | self.text = text 173 | 174 | def __repr__(self): 175 | return super(Literal, self).__repr__(repr(self.text)) 176 | 177 | def format(self, input): 178 | return match(self.text, input) 179 | 180 | 181 | class ForEach(Formatter): 182 | """ 183 | A formatter that expects a sequence or dict as input. If the input is a 184 | dict, its items() method will be called, and the resulting list used as the 185 | input sequence. For each item in the input sequence, ForEach calls the 186 | specified formatter, passing in that item. The results of all of these 187 | formatters are then concatenated into a single string, separated by the 188 | specified delimiter string (which defaults to the empty string). This 189 | string is then returned. ForEach consumes all of the input so that the 190 | remainder is the empty list. 191 | """ 192 | def __init__(self, formatter, delimiter=""): 193 | self.formatter = formatter 194 | self.delimiter = delimiter 195 | 196 | def __repr__(self): 197 | content = repr(self.formatter) 198 | if self.delimiter: 199 | content += ', ' + repr(self.delimiter) 200 | return super(ForEach, self).__repr__(content) 201 | 202 | def format(self, input): 203 | if not sequence_or_dict_type.matches(input): 204 | return failure() 205 | results = [] 206 | if isinstance(input, dict): 207 | items = input.items() 208 | else: 209 | items = input 210 | for item in items: 211 | result = self.formatter.format(item) 212 | if not result: 213 | # TODO: what should ForEach do when its formatter fails on a 214 | # particular item? At this point I'm just having it fail out, 215 | # but this needs to be thought out to see if that's really the 216 | # best behavior. 217 | return failure() 218 | results.append(result.text) 219 | return match(self.delimiter.join(results), []) # TODO: should this result in an 220 | # empty list, or should it result in None instead? 221 | 222 | 223 | class String(Formatter): 224 | """ 225 | A formatter that formats whatever data it's provided as input using 226 | Python's str() function. This is typically the formatter that you'll use 227 | to format numbers and other things like that. The remainder is always None. 228 | """ 229 | def format(self, input): 230 | return match(str(input), None) 231 | 232 | 233 | class Repr(Formatter): 234 | """ 235 | Same to String(), but this formatter uses repr() instead of str() to do the 236 | actual formatting. 237 | """ 238 | def format(self, input): 239 | return match(repr(input), None) 240 | 241 | 242 | class _ListExtremity(Formatter): 243 | """ 244 | An abstract formatter that makes dealing with items at either end of a 245 | sequence easier to work with. You shouldn't use this formatter; instead, 246 | use one of its four subclasses, Head, Tail, Front, and Back. 247 | """ 248 | def __init__(self, formatter): 249 | self.formatter = formatter 250 | 251 | def __repr__(self): 252 | return super(_ListExtremity, self).__repr__(repr(self.formatter)) 253 | 254 | def format(self, input): 255 | if not sequence_type.matches(input): 256 | return failure() 257 | elif len(input) < 1: 258 | return failure() 259 | else: 260 | value = self._value_function(input) 261 | remainder = self._remainder_function(input) 262 | result = self.formatter.format(value) 263 | if not result: 264 | return failure() 265 | return match(result.text, remainder) 266 | 267 | 268 | class Head(_ListExtremity): 269 | """ 270 | A formatter meant to be used on lists. It's constructed with another 271 | formatter. When it's called, it expects some sort of sequence; it fails if 272 | the value provided to it is not a sequence, or if it's an empty sequence. 273 | If there's at least one item, this formatter calls its underlying formatter 274 | with the first item in the sequence. It returns whatever this formatter 275 | returns, with the remainder being all of the list items except for the 276 | first. In this way, repeated invocations of Head remove items from the 277 | front of the list, so, for example, the formatter: 278 | 279 | >>> first_three = Head(String()) + Head(String()) + Head(String()) 280 | >>> first_three.format("12345").text 281 | '123' 282 | """ 283 | _value_function = lambda self, x: x[0] 284 | _remainder_function = lambda self, x: x[1:] 285 | 286 | 287 | class Tail(_ListExtremity): 288 | """ 289 | Same as Head, but this operates on and removes the last item in the list 290 | instead of the first item. 291 | """ 292 | _value_function = lambda self, x: x[-1] 293 | _remainder_function = lambda self, x: x[:-1] 294 | 295 | 296 | class Front(_ListExtremity): 297 | """ 298 | Same as Head, but the remainder of this parser is exactly the value passed 299 | to it, I.E. it doesn't consume any input. Thus the formatter: 300 | 301 | >>> first_three_times = Front(String()) + Front(String()) + Front(String()) 302 | >>> first_three_times.format("12345").text 303 | '111' 304 | """ 305 | _value_function = lambda self, x: x[0] 306 | _remainder_function = lambda self, x: x 307 | 308 | 309 | class Back(_ListExtremity): 310 | """ 311 | Same as Front, but this operates on the last item in the list instead of 312 | the first item. 313 | """ 314 | _value_function = lambda self, x: x[-1] 315 | _remainder_function = lambda self, x: x 316 | 317 | 318 | class Type(Formatter): 319 | """ 320 | A formatter that produces the empty string and consumes no input. However, 321 | it only succeeds if the value passed to it matches at least one of the 322 | specified static types. Each of those types can be a Python class or a 323 | static type as defined by parcon.static. 324 | """ 325 | def __init__(self, *static_types): 326 | self.static_type = static.Or(static_types) 327 | 328 | def __repr__(self): 329 | return super(Type, self).__repr__(str(self.static_type)) 330 | 331 | def format(self, input): 332 | if not self.static_type.matches(input): 333 | return failure() 334 | return match("", input) 335 | 336 | 337 | class And(Formatter): 338 | """ 339 | A formatter that acts like its second formatter, except that its first 340 | formatter must succeed in order for the second formatter to be considered. 341 | What the first formatter consumes will be ignored; the second formatter 342 | will be provided with the exact value that was passed into the And instance. 343 | 344 | This could be used with Type, for example, to make a certain formatter only 345 | succeed if its input is of a specific type; for example: 346 | 347 | >>> int_formatter = Type(int, long) & String() 348 | 349 | would be a formatter that formats ints and longs as per the String 350 | formatter but that fails if any other type is passed to it. 351 | """ 352 | def __init__(self, first, second): 353 | self.first = first 354 | self.second = second 355 | 356 | def __repr__(self): 357 | return super(And, self).__repr__( 358 | '{}, {}'.format(self.first, self.second)) 359 | 360 | def format(self, input): 361 | first_result = self.first.format(input) 362 | if not first_result: 363 | return failure() 364 | return self.second.format(input) 365 | 366 | 367 | class Then(Formatter): 368 | """ 369 | A formatter that applies two formatters one after the other, concatenating 370 | their results and returning them. The remainder of the first formatter will 371 | be passed to the second formatter as its value, and the remainder of Then 372 | will be the remainder of the second formatter. 373 | 374 | If either formatter fails, Then also fails. 375 | """ 376 | def __init__(self, first, second): 377 | self.first = first 378 | self.second = second 379 | 380 | def __repr__(self): 381 | return super(Then, self).__repr__('{}, {}'.format(self.first, self.second)) 382 | 383 | def format(self, input): 384 | first_result = self.first.format(input) 385 | if not first_result: 386 | return failure() 387 | second_result = self.second.format(first_result.remainder) 388 | if not second_result: 389 | return failure() 390 | return match(first_result.text + second_result.text, second_result.remainder) 391 | 392 | 393 | class First(Formatter): 394 | """ 395 | A formatter that attempts to apply all of its formatters in sequence to the 396 | value provided to First; First then acts exactly like the first of its 397 | formatters to succeed. Each formatter is passed a fresh copy of the value 398 | provided to First, without regard to what the formatter applied before it 399 | may have consumed. 400 | 401 | If none of the formatters match, First fails. 402 | """ 403 | def __init__(self, *formatters): 404 | self.formatters = formatters 405 | 406 | def __repr__(self): 407 | return super(First, self).__repr__(', '.join(map( 408 | repr, self.formatters))) 409 | 410 | def format(self, input): 411 | for formatter in self.formatters: 412 | result = formatter.format(input) 413 | if result: 414 | return match(result.text, result.remainder) 415 | return failure() 416 | 417 | 418 | class Forward(Formatter): 419 | """ 420 | A forward-declared formatter. This allows for mutually-recursive 421 | formatters; the actual underlying formatter that a particular Forward 422 | delegates to can be set later on after the Forward is created. 423 | 424 | A formatter can be set into this Forward by doing: 425 | 426 | some_forward_formatter << formatter_to_delegate_to 427 | 428 | or: 429 | 430 | some_forward_formatter.set(formatter_to_delegate_to) 431 | 432 | It's important to remember that << is not the lowest precedence of all 433 | operators; you'll probably want to wrap the right-hand side in parentheses 434 | in order to avoid precedence issues that might otherwise occur. 435 | """ 436 | def __init__(self, formatter=None): 437 | self.formatter = formatter 438 | 439 | def format(self, input): 440 | if self.formatter is None: 441 | raise Exception("Forward has not yet had a formatter set into it") 442 | return self.formatter.format(input) 443 | 444 | def set(self, formatter): 445 | self.formatter = formatter 446 | 447 | __lshift__ = set 448 | 449 | 450 | class _Cmp(Formatter): 451 | """ 452 | Base class for formatters that compare their input to a known value. 453 | 454 | Subclasses must implement a _cmp method that returns a boolean with the 455 | result of the comparison. 456 | """ 457 | def __init__(self, value): 458 | self.value = value 459 | 460 | def __repr__(self): 461 | return super(_Cmp, self).__repr__(repr(self.value)) 462 | 463 | def format(self, input): 464 | if self._cmp(input, self.value): 465 | return match("", input) 466 | return failure() 467 | 468 | 469 | class Is(_Cmp): 470 | """ 471 | A formatter that consumes no input and returns the empty string. However, 472 | it only succeeds if its input is equal, as per the == operator, to a value 473 | provided to the Is instance when it's constructed. 474 | """ 475 | @staticmethod 476 | def _cmp(a, b): 477 | return a == b 478 | 479 | 480 | class IsExactly(_Cmp): 481 | """ 482 | Same as Is, but IsExactly uses Python's is operator instead of Python's == 483 | operator to perform the equality check. This should be used for True, 484 | False, None, and other such values. 485 | """ 486 | @staticmethod 487 | def _cmp(a, b): 488 | return a is b 489 | -------------------------------------------------------------------------------- /parcon/railroad/raildraw.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This module provides support for drawing railroad diagrams created by 4 | parcon.railroad (which usually creates them from Parcon parsers) into image 5 | files or other locations. 6 | 7 | It's possible to use this module and parcon.railroad as standalone modules to 8 | create syntax diagrams for things unrelated to Parcon. I'll write up 9 | documentation on how to use this module separate from Parcon at some point. 10 | 11 | A simple example of how to use this module is present in the module 12 | documentation of parcon.railroad. 13 | 14 | This module requires Cairo and PyCairo in order to function. These must be 15 | installed separately from Parcon; both Cairo and PyCairo are available for 16 | Windows, Linux, and Mac. A Google search should turn up information on where to 17 | download and install both. (If it doesn't, go to pypi.python.org/pypi/parcon 18 | and send an email to the email address you find there.) 19 | 20 | This module also requires Pango and PyGTK (for the Pango bindings). A similar 21 | Google search should turn up information on how to install these. 22 | 23 | If you're on Ubuntu, all of the above dependencies can be installed via 24 | apt-get; I'll get a list of the specific packages to install up here soon. 25 | """ 26 | 27 | from __future__ import division, print_function 28 | from operator import itemgetter 29 | from parcon import railroad as rr 30 | from parcon import options 31 | from math import radians 32 | try: 33 | import cairo 34 | import pango 35 | import pangocairo 36 | except ImportError: 37 | print("ERROR: parcon.railroad.raildraw requires Cairo, PyCairo, Pango," 38 | "and PyGTK (for the Pango bindings). Please install any of those " 39 | "that you may be missing. An ImportError is about to be raised as " 40 | "a result of one of those not being present.") 41 | raise 42 | 43 | 44 | # We have two dicts, one that maps railroad classes (Then, Or, Token, etc.) to 45 | # functions that return (width, height, line_position) and one that maps 46 | # railroad classes to functions that draw them. 47 | 48 | size_functions = {} 49 | draw_functions = {} 50 | 51 | plain_font = pango.FontDescription("sans 10") 52 | bold_font = pango.FontDescription("sans bold 10") 53 | italic_font = pango.FontDescription("sans italic 10") 54 | bold_italic_font = pango.FontDescription("sans bold italic 10") 55 | title_font = pango.FontDescription("sans bold 14") 56 | default_line_size = 2 57 | 58 | def create_options(map): 59 | return options.Options(map, 60 | raildraw_production_font=plain_font, 61 | raildraw_text_font=bold_font, 62 | raildraw_anycase_font=plain_font, 63 | raildraw_description_font=italic_font, 64 | raildraw_title_font=title_font, 65 | raildraw_title_before=45, 66 | raildraw_title_after=45, 67 | raildraw_title_hide=False, 68 | raildraw_arrow_width=9, 69 | raildraw_arrow_height=7, 70 | raildraw_arrow_indent=0.25, 71 | raildraw_size_of_arrow=size_of_arrow, 72 | raildraw_draw_arrow=draw_arrow, 73 | raildraw_token_padding=1, 74 | raildraw_token_margin=0, 75 | raildraw_token_rect_padding=12, 76 | raildraw_then_before_arrow=8, 77 | raildraw_then_after_arrow=0, 78 | raildraw_line_size=1.6, 79 | raildraw_or_spacing=8, 80 | raildraw_or_radius=7, 81 | raildraw_or_before=0, 82 | raildraw_or_after=4, 83 | raildraw_bullet_radius=2.5, 84 | raildraw_loop_spacing=8, 85 | raildraw_loop_radius=7, 86 | raildraw_loop_before=6, 87 | raildraw_loop_after=6, 88 | raildraw_scale=1.0 89 | ) 90 | 91 | 92 | def f(map, key): 93 | """ 94 | A function decorator that results in the specified function being added 95 | to the specified map under the specified key. 96 | """ 97 | def decorator(function): 98 | map[key] = function 99 | return function 100 | return decorator 101 | 102 | 103 | def size_of(image, construct, options): 104 | return size_functions[type(construct)](image, construct, options) 105 | 106 | 107 | def draw(image, x, y, construct, options, forward): 108 | return draw_functions[type(construct)](image, x, y, construct, options, forward) 109 | 110 | 111 | def get_font_for_token(options, token): 112 | if token.type == rr.PRODUCTION: 113 | return options.raildraw_production_font 114 | if token.type == rr.TEXT: 115 | return options.raildraw_text_font 116 | if token.type == rr.ANYCASE: 117 | return options.raildraw_anycase_font 118 | if token.type == rr.DESCRIPTION: 119 | return options.raildraw_description_font 120 | raise ValueError 121 | 122 | 123 | def draw_arrow(image, x, y, options, forward): 124 | """ 125 | Draws an arrow at the specified position. 126 | """ 127 | width, height = size_of_arrow(options) 128 | line_pos = height / 2 129 | indent = options.raildraw_arrow_indent * width 130 | if forward: 131 | image.move_to(x, y + line_pos) 132 | image.line_to(x + indent, y + line_pos) 133 | image.stroke() 134 | image.move_to(x, y) 135 | image.line_to(x + width, y + line_pos) 136 | image.line_to(x, y + height) 137 | image.line_to(x + indent, y + line_pos) 138 | else: 139 | image.move_to(x + width, y + line_pos) 140 | image.line_to(x + (width - indent), y + line_pos) 141 | image.stroke() 142 | image.move_to(x + width, y) 143 | image.line_to(x, y + line_pos) 144 | image.line_to(x + width, y + height) 145 | image.line_to(x + (width - indent), y + line_pos) 146 | image.close_path() 147 | image.fill() 148 | 149 | 150 | def draw_line(image, x1, y1, x2, y2): 151 | if x1 == y1 and x2 == y2: # Empty line 152 | return 153 | image.move_to(x1, y1) 154 | image.line_to(x2, y2) 155 | image.stroke() 156 | 157 | 158 | def size_of_arrow(options): 159 | """ 160 | Returns the size of an arrow, in the same format as all of the other size 161 | functions, namely (width, height, line_position). 162 | """ 163 | width = options.raildraw_arrow_width 164 | height = options.raildraw_arrow_height 165 | return width, height 166 | 167 | 168 | @f(size_functions, rr.Nothing) 169 | def size_of_Nothing(image, construct, options): 170 | width, height = options.raildraw_size_of_arrow(options) 171 | return width, height, height / 2 172 | 173 | 174 | @f(draw_functions, rr.Nothing) 175 | def draw_Nothing(image, x, y, construct, options, forward): 176 | return options.raildraw_draw_arrow(image, x, y, options, forward) 177 | 178 | 179 | @f(size_functions, rr.Token) 180 | def size_of_Token(image, construct, options): 181 | pango_context = pangocairo.CairoContext(image) 182 | layout = pango_context.create_layout() 183 | layout.set_text(construct.text) 184 | layout.set_font_description(get_font_for_token(options, construct)) 185 | text_width, text_height = layout.get_pixel_size() 186 | h_padding = options.raildraw_token_padding 187 | v_padding = options.raildraw_token_padding 188 | if construct.type not in (rr.TEXT, rr.ANYCASE): 189 | h_padding += options.raildraw_token_rect_padding 190 | margin = options.raildraw_token_margin 191 | height = text_height + (v_padding * 2) + (margin * 2) 192 | width = text_width + (h_padding * 2) + (margin * 2) 193 | if construct.type in (rr.TEXT, rr.ANYCASE): 194 | # TEXT and ANYCASE are drawn with half-circles on either end, so we 195 | # need to account for the size of these circles. What we add here is 196 | # the diameter of these circles, which accounts for a half circle at 197 | # either end. 198 | width += text_height + (v_padding * 2) 199 | return (width, height, height / 2) 200 | 201 | 202 | @f(draw_functions, rr.Token) 203 | def draw_Token(image, x, y, construct, options, forward): 204 | margin = options.raildraw_token_margin 205 | h_padding = options.raildraw_token_padding 206 | v_padding = options.raildraw_token_padding 207 | if construct.type not in (rr.TEXT, rr.ANYCASE): 208 | h_padding += options.raildraw_token_rect_padding 209 | pango_context = pangocairo.CairoContext(image) 210 | layout = pango_context.create_layout() 211 | layout.set_text(construct.text) 212 | layout.set_font_description(get_font_for_token(options, construct)) 213 | text_width, text_height = layout.get_pixel_size() 214 | if construct.type in (rr.TEXT, rr.ANYCASE): 215 | diameter = v_padding + text_height + v_padding 216 | radius = diameter / 2 217 | image.move_to(x + margin + radius + h_padding, y + margin + v_padding) 218 | pango_context.show_layout(layout) 219 | image.move_to(x + margin + radius, y + margin) 220 | image.line_to(x + margin + radius + h_padding + text_width + h_padding, y + margin) 221 | image.arc(x + margin + radius + h_padding + text_width + h_padding, y + margin + radius, radius, radians(270), radians(90)) 222 | image.line_to(x + margin + radius, y + margin + v_padding + text_height + v_padding) 223 | image.arc(x + margin + radius, y + margin + radius, radius, radians(90), radians(270)) 224 | image.close_path() # Shouldn't have any effect since we're already at 225 | # the start, but just in case 226 | image.stroke() 227 | width = margin + radius + h_padding + text_width + h_padding + radius + margin 228 | else: 229 | image.move_to(x + margin + h_padding, y + margin + v_padding) 230 | pango_context.show_layout(layout) 231 | image.move_to(x + margin, y + margin) 232 | image.line_to(x + margin + h_padding + text_width + h_padding, y + margin) 233 | image.line_to(x + margin + h_padding + text_width + h_padding, y + margin + v_padding + text_height + v_padding) 234 | image.line_to(x + margin, y + margin + v_padding + text_height + v_padding) 235 | image.close_path() 236 | image.stroke() 237 | width = margin + h_padding + text_width + h_padding + margin 238 | image.move_to(x, y + margin + v_padding + (text_height / 2)) 239 | image.line_to(x + margin, y + margin + v_padding + (text_height / 2)) 240 | image.stroke() 241 | image.move_to(x + width, y + margin + v_padding + (text_height / 2)) 242 | image.line_to(x + width - margin, y + margin + v_padding + (text_height / 2)) 243 | image.stroke() 244 | 245 | 246 | @f(size_functions, rr.Then) 247 | def size_of_Then(image, construct, options): 248 | constructs = construct.constructs 249 | sizes = [size_of(image, c, options) for c in constructs] 250 | before_heights = [l for w, h, l in sizes] 251 | after_heights = [h - l for w, h, l in sizes] 252 | max_before = max(before_heights) 253 | max_after = max(after_heights) 254 | arrow_line_size = (options.raildraw_then_before_arrow + 255 | options.raildraw_size_of_arrow(options)[0] + 256 | options.raildraw_then_after_arrow) 257 | return sum([w for w, h, l in sizes]) + (len(sizes) - 1) * arrow_line_size, max_before + max_after, max_before 258 | 259 | 260 | @f(draw_functions, rr.Then) 261 | def draw_Then(image, x, y, construct, options, forward): 262 | constructs = construct.constructs 263 | arrow_before = options.raildraw_then_before_arrow 264 | arrow_after = options.raildraw_then_after_arrow 265 | if not forward: 266 | constructs = list(reversed(constructs)) 267 | arrow_before, arrow_after = arrow_after, arrow_before 268 | arrow_width, arrow_height = options.raildraw_size_of_arrow(options) 269 | width, height, line_position = size_of_Then(image, construct, options) 270 | current_x = x 271 | for index, c in enumerate(constructs): 272 | c_width, c_height, c_line_position = size_of(image, c, options) 273 | draw(image, current_x, y + (line_position - c_line_position), c, options, forward) 274 | current_x += c_width 275 | if index != (len(constructs) - 1): 276 | draw_line(image, current_x, y + line_position, current_x + arrow_before, y + line_position) 277 | current_x += arrow_before 278 | options.raildraw_draw_arrow(image, current_x, y + line_position - (arrow_height / 2), options, forward) 279 | current_x += arrow_width 280 | draw_line(image, current_x, y + line_position, current_x + arrow_after, y + line_position) 281 | current_x += arrow_after 282 | 283 | 284 | @f(size_functions, rr.Or) 285 | def size_of_Or(image, construct, options): 286 | constructs = construct.constructs 287 | if len(constructs) == 1: 288 | return size_of(image, constructs[0], options) 289 | sizes = [size_of(image, c, options) for c in constructs] 290 | max_width = max(sizes, key=itemgetter(0))[0] 291 | total_height = sum([h for w, h, l in sizes]) 292 | arrow_width, arrow_height = options.raildraw_size_of_arrow(options) 293 | width = ((options.raildraw_or_radius * 4) + options.raildraw_or_before 294 | + max_width + options.raildraw_or_after + (arrow_width * 2)) 295 | height = total_height + ((len(constructs) - 1) * options.raildraw_or_spacing) 296 | # Line position of Or is the line position of its first construct 297 | return width, height, sizes[0][2] 298 | 299 | 300 | @f(draw_functions, rr.Or) 301 | def draw_Or(image, x, y, construct, options, forward): 302 | if len(construct.constructs) == 1: 303 | return draw(image, x, y, construct.constructs[0], options, forward) 304 | width, height, line_position = size_of_Or(image, construct, options) 305 | constructs = construct.constructs 306 | sizes = [size_of(image, c, options) for c in constructs] 307 | max_width = max(sizes, key=itemgetter(0))[0] 308 | radius = options.raildraw_or_radius 309 | spacing = options.raildraw_or_spacing 310 | arrow_width, arrow_height = options.raildraw_size_of_arrow(options) 311 | before = options.raildraw_or_before 312 | after = options.raildraw_or_after 313 | if not forward: 314 | before, after = after, before 315 | current_y = y 316 | for index, (c, (w, h, l)) in enumerate(zip(constructs, sizes)): 317 | draw_arrows = not isinstance(c, rr.Nothing) 318 | # Don't draw arrows if c is a loop and its component is not Nothing; 319 | # the arrows tend to appear superfluous in such a case 320 | if isinstance(c, rr.Loop) and not isinstance(c.component, rr.Nothing): 321 | draw_arrows = False 322 | if index != 0: 323 | image.move_to(x + radius, current_y + l - radius) 324 | image.arc_negative(x + radius * 2, current_y + l - radius, radius, radians(180), radians(90)) 325 | image.stroke() 326 | if not draw_arrows: 327 | draw_line(image, x + radius * 2, current_y + l, x + radius * 2 + arrow_width, current_y + l) 328 | else: 329 | options.raildraw_draw_arrow(image, x + radius * 2, current_y + l - (arrow_height / 2), options, forward) 330 | draw_line(image, x + radius * 2 + arrow_width, current_y + l, x + radius * 2 + arrow_width + before, current_y + l) 331 | construct_x = x + radius * 2 + arrow_width + before 332 | if isinstance(c, rr.Nothing): 333 | draw_line(image, construct_x, current_y + l, construct_x + max_width / 2 - w / 2, current_y + l) 334 | draw(image, construct_x + max_width / 2 - w / 2, current_y, c, options, forward) 335 | draw_line(image, construct_x + max_width / 2 + w / 2, current_y + l, construct_x + max_width + after, current_y + l) 336 | else: 337 | draw(image, construct_x, current_y, c, options, forward) 338 | draw_line(image, construct_x + w, current_y + l, construct_x + max_width + after, current_y + l) 339 | if not draw_arrows: 340 | draw_line(image, construct_x + max_width + after, current_y + l, construct_x + max_width + after + arrow_width, current_y + l) 341 | else: 342 | options.raildraw_draw_arrow(image, construct_x + max_width + after, current_y + l - (arrow_height / 2), options, forward) 343 | if index != 0: 344 | image.move_to(construct_x + max_width + after + arrow_width, current_y + l) 345 | image.arc_negative(construct_x + max_width + after + arrow_width, current_y + l - radius, radius, radians(90), radians(0)) 346 | image.stroke() 347 | if index == len(constructs) - 1: # Last construct 348 | line_end_y = current_y + l - radius 349 | current_y += spacing + h 350 | image.move_to(x, y + line_position) 351 | image.arc(x, y + line_position + radius, radius, radians(270), radians(0)) 352 | image.line_to(x + radius, line_end_y) 353 | image.stroke() 354 | draw_line(image, x, y + line_position, x + radius * 2, y + line_position) 355 | end_x = x + radius * 2 + arrow_width + before + max_width + after + arrow_width 356 | draw_line(image, end_x, y + line_position, end_x + radius * 2, y + line_position) 357 | image.move_to(x + width, y + line_position) 358 | image.arc_negative(x + width, y + line_position + radius, radius, radians(270), radians(180)) 359 | image.line_to(x + width - radius, line_end_y) 360 | image.stroke() 361 | 362 | 363 | @f(size_functions, rr.Loop) 364 | def size_of_Loop(image, construct, options): 365 | component = construct.component 366 | delimiter = construct.delimiter 367 | c_width, c_height, c_line_pos = size_of(image, component, options) 368 | d_width, d_height, d_line_pos = size_of(image, delimiter, options) 369 | radius = options.raildraw_loop_radius 370 | before = options.raildraw_loop_before 371 | after = options.raildraw_loop_after 372 | arrow_width, arrow_height = options.raildraw_size_of_arrow(options) 373 | width = radius * 2 + arrow_width + before + max(c_width, d_width) + after + arrow_width + radius * 2 374 | height = c_height + options.raildraw_loop_spacing + d_height 375 | line_pos = c_line_pos 376 | return width, height, line_pos 377 | 378 | 379 | @f(draw_functions, rr.Loop) 380 | def draw_Loop(image, x, y, construct, options, forward): 381 | component = construct.component 382 | delimiter = construct.delimiter 383 | c_width, c_height, c_line_pos = size_of(image, component, options) 384 | d_width, d_height, d_line_pos = size_of(image, delimiter, options) 385 | c_arrow = not isinstance(component, rr.Nothing) 386 | d_arrow = not isinstance(delimiter, rr.Nothing) 387 | # For now, if the delimiter is Nothing, don't draw the component's arrows 388 | if not d_arrow: 389 | c_arrow = False 390 | spacing = options.raildraw_loop_spacing 391 | radius = options.raildraw_loop_radius 392 | before = options.raildraw_loop_before 393 | after = options.raildraw_loop_after 394 | arrow_width, arrow_height = options.raildraw_size_of_arrow(options) 395 | d_y = y + c_height + spacing 396 | width, height, line_pos = size_of_Loop(image, construct, options) 397 | max_width = max(c_width, d_width) 398 | center_x = x + radius * 2 + arrow_width + before + (max_width / 2) 399 | draw_line(image, x, y + line_pos, x + radius * 2, y + line_pos) 400 | draw_arrow_or_line(image, x + radius * 2, y + line_pos - arrow_height / 2, arrow_width, arrow_height, options, forward, c_arrow) 401 | draw_line(image, x + radius * 2 + arrow_width, y + line_pos, center_x - c_width / 2, y + line_pos) 402 | draw(image, center_x - c_width / 2, y, component, options, forward) 403 | draw_line(image, center_x + c_width / 2, y + line_pos, x + width - radius * 2 - arrow_width, y + line_pos) 404 | draw_arrow_or_line(image, x + width - radius * 2 - arrow_width, y + line_pos - arrow_height / 2, arrow_width, arrow_height, options, forward, c_arrow) 405 | draw_line(image, x + width - radius * 2, y + line_pos, x + width, y + line_pos) 406 | # Component and its two arrows and line drawn. Now draw the curve down and 407 | # the delimiter, and its arrows and lines. 408 | image.move_to(x + radius * 2, y + line_pos) 409 | image.arc_negative(x + radius * 2, y + line_pos + radius, radius, radians(270), radians(180)) 410 | image.line_to(x + radius, d_y + d_line_pos - radius) 411 | image.arc_negative(x + radius * 2, d_y + d_line_pos - radius, radius, radians(180), radians(90)) 412 | image.stroke() 413 | draw_arrow_or_line(image, x + radius * 2, d_y + d_line_pos - arrow_height / 2, arrow_width, arrow_height, options, not forward, d_arrow) 414 | draw_line(image, x + radius * 2 + arrow_width, d_y + d_line_pos, center_x - d_width / 2, d_y + d_line_pos) 415 | draw(image, center_x - d_width / 2, d_y, delimiter, options, not forward) 416 | draw_line(image, center_x + d_width / 2, d_y + d_line_pos, x + width - radius * 2 - arrow_width, d_y + d_line_pos) 417 | draw_arrow_or_line(image, x + width - radius * 2 - arrow_width, d_y + d_line_pos - arrow_height / 2, arrow_width, arrow_height, options, not forward, d_arrow) 418 | image.move_to(x + width - radius * 2, d_y + d_line_pos) 419 | image.arc_negative(x + width - radius * 2, d_y + d_line_pos - radius, radius, radians(90), radians(0)) 420 | image.line_to(x + width - radius, y + line_pos + radius) 421 | image.arc_negative(x + width - radius * 2, y + line_pos + radius, radius, radians(0), radians(270)) 422 | image.stroke() 423 | 424 | 425 | # FIXME: Test loops with components and delimiters that have different heights 426 | # and different line positions 427 | 428 | 429 | def draw_arrow_or_line(image, x, y, arrow_width, arrow_height, options, forward, arrow): 430 | if arrow: 431 | options.raildraw_draw_arrow(image, x, y, options, forward) 432 | else: 433 | draw_line(image, x, y + arrow_height / 2, x + arrow_width, y + arrow_height / 2) 434 | 435 | 436 | @f(size_functions, rr.Bullet) 437 | def size_of_Bullet(image, construct, options): 438 | diameter = options.raildraw_bullet_radius * 2 439 | return diameter, diameter, diameter / 2 440 | 441 | 442 | @f(draw_functions, rr.Bullet) 443 | def draw_Bullet(image, x, y, construct, options, forward): 444 | radius = options.raildraw_bullet_radius 445 | image.move_to(x + radius * 2, y + radius) 446 | image.arc(x + radius, y + radius, radius, radians(0), radians(360)) 447 | image.stroke() 448 | 449 | 450 | del f 451 | 452 | 453 | def draw_text(context, x, y, font, text): 454 | pango_context = pangocairo.CairoContext(context) 455 | layout = pango_context.create_layout() 456 | layout.set_text(text) 457 | layout.set_font_description(font) 458 | context.move_to(x, y) 459 | pango_context.show_layout(layout) 460 | return layout.get_pixel_size() 461 | 462 | 463 | def draw_to_surface(surface_cb, diagram, options, filename, forward=True): 464 | """ 465 | Draws the specified railroad diagram, which should be an instance of 466 | parcon.railroad.Component or one of its subclasses, or a dictionary, into 467 | the PNG file at the specified file name. 468 | 469 | If the specified diagram is a dict, each of the diagrams contained as its 470 | values will be drawn into the file, top to bottom, with the corresponding 471 | keys (which should be strings) used as titles before each diagram. 472 | 473 | You can either manually create instances of any of 474 | parcon.railroad.Component's subclasses to pass to this method, or you can 475 | convert a Parcon parser to a Component by calling its create_railroad 476 | method. 477 | 478 | options is a dictionary of options to use. For now, just use the empty 479 | dict; I'll get around to documenting the options that you can use here at 480 | some point. 481 | """ 482 | if not isinstance(diagram, dict): 483 | diagram = {"": diagram} 484 | options = create_options(options) 485 | before_title = options.raildraw_title_before 486 | after_title = options.raildraw_title_after 487 | # Create an empty image to give size_of something to reference 488 | empty_image = cairo.ImageSurface(cairo.FORMAT_ARGB32, 1, 1) 489 | empty_context = cairo.Context(empty_image) 490 | width, height = 0, 0 491 | for name, d in diagram.items(): 492 | w, h, l = size_of(empty_context, d, options) 493 | width, height = max(width, w), h + height 494 | height += len(diagram) * (before_title + after_title) 495 | image = surface_cb(int((width + 16) * options.raildraw_scale), 496 | int((height + 16) * options.raildraw_scale)) 497 | context = cairo.Context(image) 498 | if options.raildraw_scale != 1: 499 | context.scale(options.raildraw_scale, options.raildraw_scale) 500 | x = 8 501 | y = 8 502 | for name, d in diagram.items(): 503 | if not options.raildraw_title_hide: 504 | draw_text(context, x, y, options.raildraw_title_font, name + ":") 505 | y += after_title 506 | draw_to_context(context, d, options, filename, forward, x, y) 507 | # FIXME: store the size as computed 10 or 20 lines above to avoid 508 | # having to compute it twice 509 | y += size_of(context, d, options)[1] 510 | y += before_title 511 | 512 | def draw_to_png(diagram, options, filename, forward): 513 | image_ref = [0] 514 | def get_surface_cb(image_ref): 515 | def get_surface(width,height): 516 | image_ref[0] = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height) 517 | return image_ref[0] 518 | return get_surface 519 | draw_to_surface(get_surface_cb(image_ref), diagram, options, filename, forward) 520 | image_ref[0].write_to_png(filename) 521 | 522 | def draw_to_svg(diagram, options, filename, forward): 523 | def get_surface(width, height): 524 | return cairo.SVGSurface(filename, width, height) 525 | draw_to_surface(get_surface, diagram, options, filename, forward) 526 | 527 | def draw_to_image(img_type, diagram, options, filename, forward=True): 528 | if img_type == 'png': 529 | draw_to_png(diagram, options, filename, forward) 530 | elif img_type == 'svg': 531 | draw_to_svg(diagram, options, filename, forward) 532 | else: 533 | raise Exception("No such image type") 534 | 535 | def draw_to_context(context, diagram, options, filename, forward=True, x=8, y=8): 536 | """ 537 | Same as draw_to_png, but draws the specified railroad diagram to a context, 538 | which should be an instance of cairo.Context, instead of to a PNG file. 539 | draw_to_png actually delegates to this function to do the actual drawing. 540 | 541 | x and y are the position at which to draw the specified diagram. 542 | """ 543 | diagram = diagram.copy() 544 | diagram.optimize() 545 | context.set_line_width(options.raildraw_line_size) 546 | draw(context, x, y, diagram, options, forward) 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | -------------------------------------------------------------------------------- /technotes/web-template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 12 | Parcon 13 | 14 | 16 | 17 | 460 | 508 | 511 | 512 | 513 | 514 | 533 | 534 | 535 | 536 | 537 | 538 | 578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 | 643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 | 723 |
724 |
725 |
726 |
727 |
728 | 729 | 730 |
731 | 732 |
733 | 734 |
735 |
736 | 737 |
738 |
739 |
740 |
741 | 742 | 743 | 744 | INSERT_CONTENT_HERE 745 | 746 | 747 | 748 | 752 | 756 |
757 |
758 |
759 |
760 | 761 |
762 | 763 | 764 |
765 |
766 | 767 | 768 |
769 |
770 |
771 | 773 |
774 |
775 |
776 |
777 |
778 |
779 | 781 |
782 |
783 |
784 |
785 | 788 |
789 |
790 |
791 |
792 | 793 |
794 | 795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 | 846 |
847 | 848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 | 861 | 864 | 878 | 879 | 880 | --------------------------------------------------------------------------------