├── patches ├── boot_memoizer.patch ├── boot_pos.patch ├── python_memoizer.patch └── python_pos.patch ├── pymetaterp ├── __init__.py ├── boot.py ├── boot_compiled.py ├── boot_grammar.py ├── boot_stackless.py ├── boot_tree.py ├── python.py ├── python_compiled.py ├── python_grammar.py └── util.py ├── readme.md ├── setup.py ├── single_file.py └── test ├── boot_test.py ├── compiled_python_test.py ├── compiled_test.py └── python_parse_test.py /patches/boot_memoizer.patch: -------------------------------------------------------------------------------- 1 | diff --git pymetaterp/boot_stackless.py pymetaterp/boot_stackless.py 2 | index 55ce028..1793b7e 100644 3 | --- pymetaterp/boot_stackless.py 4 | +++ pymetaterp/boot_stackless.py 5 | @@ -57,6 +57,7 @@ class Interpreter: 6 | self.input = [input, pos] 7 | self.stack = [Frame(root, self.input)] 8 | output = self.new_step() 9 | + self.memoizer = {} 10 | while True: 11 | if output is Eval: 12 | root = self.stack[-1].calls[len(self.stack[-1].outputs)] 13 | @@ -83,6 +84,11 @@ class Interpreter: 14 | # print " "*len(self.stack), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11] 15 | if root[NAME] == "anything": 16 | return pop(self.input) 17 | + key = (root[NAME], id(self.input[0]), self.input[1]) 18 | + if key in self.memoizer: 19 | + self.input = self.memoizer[key][1][:] 20 | + return self.memoizer[key][0] 21 | + self.stack[-1].key = key 22 | calls.append(self.rules[root[NAME]][BODY]) 23 | elif name in ["exactly", "token"]: 24 | if name == "token": 25 | @@ -145,9 +151,11 @@ class Interpreter: 26 | make_node = "!" in self.rules[root[NAME]][FLAGS] or\ 27 | (and_node and len(output) > 1) 28 | #print len(self.stack)*" ", "returned", output 29 | - if not make_node: 30 | - return output 31 | - return Node(root[NAME], to_list(output)) 32 | + if make_node: 33 | + output = Node(root[NAME], to_list(output)) 34 | + self.memoizer[frame.key] = (output, self.input[:]) 35 | + return output 36 | + 37 | elif name in "bound": 38 | return Node(root[1][0], to_list(output)) 39 | elif name == "negation": 40 | -------------------------------------------------------------------------------- /patches/boot_pos.patch: -------------------------------------------------------------------------------- 1 | diff --git pymetaterp/boot_stackless.py pymetaterp/boot_stackless.py 2 | index 1793b7e..cb9470c 100644 3 | --- pymetaterp/boot_stackless.py 4 | +++ pymetaterp/boot_stackless.py 5 | @@ -64,6 +64,8 @@ class Interpreter: 6 | self.stack.append(Frame(root, self.input)) 7 | output = self.new_step() 8 | else: 9 | + if type(output) == Node: 10 | + output.pos = (self.stack[-1].input[1]+1, self.input[1]+1) 11 | self.stack.pop() 12 | if not self.stack: 13 | return output 14 | -------------------------------------------------------------------------------- /patches/python_memoizer.patch: -------------------------------------------------------------------------------- 1 | --- python.py 2017-03-13 12:23:33.754710023 +0000 2 | +++ python_memoized.py 2017-03-13 12:29:32.884700105 +0000 3 | @@ -8,6 +8,7 @@ 4 | self.indentation = [0] 5 | self.locals = {} 6 | self.debug = debug 7 | + self.memoizer = {} 8 | return boot.Interpreter.match(self, root, input, pos) 9 | 10 | def eval(self, root): 11 | @@ -51,6 +52,12 @@ 12 | elif root[NAME] == "void": 13 | return 14 | else: 15 | + key = (root[NAME], id(self.input[0]), self.input[1], 16 | + tuple(self.indentation)) 17 | + if key in self.memoizer: 18 | + self.input = self.memoizer[key][1][:] 19 | + return self.memoizer[key][0] 20 | + self.stack[-1].key = key 21 | calls.append(self.rules[root[NAME]][BODY]) 22 | self.stack[-1].locals = self.locals 23 | self.locals = {} 24 | @@ -104,7 +111,9 @@ 25 | elif name == "apply": 26 | # Need to run this line even on error 27 | self.locals = frame.locals 28 | - return boot.Interpreter.next_step(self) 29 | + output = boot.Interpreter.next_step(self) 30 | + self.memoizer[frame.key] = (output, self.input[:]) 31 | + return output 32 | elif name == "lookahead": 33 | self.input = frame.input[:] 34 | return output 35 | -------------------------------------------------------------------------------- /patches/python_pos.patch: -------------------------------------------------------------------------------- 1 | diff --git pymetaterp/python.py pymetaterp/python.py 2 | index 29ac20d..0a9a558 100644 3 | --- pymetaterp/python.py 4 | +++ pymetaterp/python.py 5 | @@ -124,12 +129,14 @@ class Interpreter(boot.Interpreter): 6 | def reformat_atom(atom, trailers): 7 | output = atom 8 | for trailer in to_list(trailers): 9 | + pos = (output.pos[0], trailer.pos[1]) 10 | if trailer.name == "arglist": 11 | - output = Node("__call__", [output, trailer]) 12 | + output = Node("__call__", [output, trailer], pos=pos) 13 | elif trailer.name == "NAME": 14 | - output = Node("__getattr__", [output, Node("NAME", trailer)]) 15 | + output = Node("__getattr__", [output, Node("NAME", trailer, 16 | + pos=trailer.pos)], pos=pos) 17 | elif trailer.name == "subscriptlist": 18 | - output = Node("__getitem__", [output] + trailer) 19 | + output = Node("__getitem__", [output] + trailer, pos=pos) 20 | else: 21 | raise Exception("Unknown trailer %s" % trailer.name) 22 | return output 23 | @@ -154,7 +161,7 @@ def reformat_binary(start, oper_and_atoms): 24 | while index < len(tokens) and\ 25 | priority[tokens[index][0][0]] > priority[op]: 26 | rhs, index = parse(rhs, tokens, index) 27 | - lhs = Node("__binary__", [op, lhs, rhs]) 28 | + lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1])) 29 | return (lhs, index) 30 | if not oper_and_atoms: 31 | return start 32 | -------------------------------------------------------------------------------- /pymetaterp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asrp/pymetaterp/ed235fa31dfc2e2d8febe324fe6c4cc17bbaa8fb/pymetaterp/__init__.py -------------------------------------------------------------------------------- /pymetaterp/boot.py: -------------------------------------------------------------------------------- 1 | from util import MatchError, Node 2 | 3 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3] 4 | inf = float("inf") 5 | # input is a pair (container, pos) 6 | 7 | def pop(input): 8 | input[1] += 1 9 | try: 10 | return input[0][input[1]] 11 | except IndexError: 12 | raise MatchError("EOF") 13 | 14 | def to_list(output): 15 | return output if getattr(output, "name", None) == "And" else\ 16 | [] if output is None else\ 17 | [output] 18 | 19 | class Interpreter: 20 | def __init__(self, grammar_tree): 21 | self.rules = {rule[NAME][0]:rule for rule in grammar_tree} 22 | self.join_str = True 23 | 24 | def match(self, root, new_input=None, new_pos=-1): 25 | """ >>> g.match(g.rules['grammar'][-1], "x='y'") """ 26 | if new_input is not None: 27 | self.input = [new_input, new_pos] 28 | old_input = self.input[:] 29 | name = root.name 30 | #print("matching %s" % name) 31 | if name in ["and", "args", "body", "output"]: 32 | outputs = [self.match(child) for child in root] 33 | if any(child.name == "output" for child in root): 34 | outputs = [output for child, output in zip(root, outputs) 35 | if child.name == "output"] 36 | elif name == "quantified": 37 | assert(root[1].name == "quantifier") 38 | lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]] 39 | outputs = [] 40 | while len(outputs) < upper: 41 | last_input = self.input[:] 42 | try: 43 | outputs.append(self.match(root[0])) 44 | except MatchError: 45 | self.input = last_input[:] 46 | break 47 | if last_input == self.input: 48 | break 49 | if lower > len(outputs): 50 | raise MatchError("Matched %s < %s times" % (len(outputs), lower)) 51 | elif name == "or": 52 | for child in root: 53 | try: 54 | return self.match(child) 55 | except MatchError: 56 | self.input = old_input[:] 57 | raise MatchError("All Or matches failed") 58 | elif name in ["exactly", "token"]: 59 | if name == "token": 60 | while pop(self.input) in ['\t', '\n', '\r', ' ']: 61 | pass 62 | self.input[1] -= 1 63 | if pop(self.input) == root[0]: 64 | return root[0] 65 | else: 66 | raise MatchError("Not exactly %s" % root) 67 | elif name == "apply": 68 | #print "rule %s" % root[NAME] 69 | if root[NAME] == "anything": 70 | return pop(self.input) 71 | outputs = self.match(self.rules[root[NAME]][BODY]) 72 | if root[NAME] == "escaped_char": 73 | chars = dict(["''", '""', "t\t", "n\n", "r\r", 74 | "b\b", "f\f", "\\\\"]) 75 | return chars[outputs] 76 | and_node = getattr(outputs, "name", None) == "And" 77 | make_node = "!" in self.rules[root[NAME]][FLAGS] or\ 78 | (and_node and len(outputs) > 1) 79 | if not make_node: 80 | return outputs 81 | return Node(root[NAME], to_list(outputs)) 82 | elif name in "bound": 83 | return Node(root[1][0], to_list(self.match(root[0]))) 84 | elif name == "negation": 85 | try: 86 | self.match(root[0]) 87 | except MatchError: 88 | self.input = old_input 89 | return None 90 | raise MatchError("Negation true") 91 | else: 92 | raise Exception("Unknown operator %s" % name) 93 | 94 | outputs = [elem for output in outputs 95 | for elem in to_list(output)] 96 | if len(outputs) == 1: 97 | return outputs[0] 98 | elif len(outputs) == 0: 99 | return None 100 | else: 101 | if self.join_str and all(type(output) == str for output in outputs): 102 | return "".join(outputs) 103 | return Node("And", outputs) 104 | -------------------------------------------------------------------------------- /pymetaterp/boot_compiled.py: -------------------------------------------------------------------------------- 1 | from util import MatchError 2 | from pdb import set_trace as bp 3 | 4 | inf = float("inf") 5 | 6 | class Glob(object): 7 | pass 8 | 9 | g = Glob() 10 | 11 | class Source(): 12 | def __init__(self, source): 13 | self.source = source 14 | self.position = -1 15 | def next(self): 16 | self.position += 1 17 | try: 18 | return self.source[self.position] 19 | except IndexError: 20 | return MatchError("EOF") 21 | 22 | class Node(object): 23 | def __init__(self, name, children, pos=(None, None)): 24 | self.name = name 25 | self.children = children 26 | self.pos = pos 27 | def __getitem__(self, index): 28 | if type(self.children) == list: 29 | return self.children[index] 30 | else: 31 | return [self.children][index] 32 | def __len__(self): 33 | return len(self.children) if isinstance(self.children, list) else 1 if self.children else 0 34 | def __repr__(self): 35 | return "%s(%s)" % (self.name, self.children) 36 | def pprint(self, indent=0): 37 | print " "*indent + self.name 38 | children = [self.children] if not isinstance(self.children, list) else self.children 39 | for child in children: 40 | if not hasattr(child, "pprint"): 41 | print " "*(indent + 1), type(child).__name__, repr(child) 42 | else: 43 | child.pprint(indent + 2) 44 | 45 | def to_list(value): 46 | return value if isinstance(value, list) else\ 47 | [] if value is None else\ 48 | [value] 49 | 50 | def exactly(char): 51 | ichar = g.input.next() 52 | return ichar if isinstance(ichar, MatchError) or char == ichar\ 53 | else MatchError("Not exactly %s" % char) 54 | 55 | def between(start, end): 56 | ichar = g.input.next() 57 | return ichar if isinstance(ichar, MatchError) or start <= ichar <= end\ 58 | else MatchError("Not between %s and %s" % (start, end)) 59 | 60 | def token(s): 61 | while g.input.next() in ['\t', '\n', '\r', ' ']: 62 | pass 63 | g.input.position -= 1 64 | for char in s: 65 | if g.input.next() != char: 66 | return MatchError("Not exactly %s" % char) 67 | return s 68 | 69 | def or_(children): 70 | saved = g.input.position 71 | for child in children: 72 | g.input.position = saved 73 | output = child() 74 | if not isinstance(output, MatchError): 75 | return output 76 | g.input.position = saved 77 | return MatchError("No OR child matches") 78 | 79 | def and_(children): 80 | saved = g.input.position 81 | outputs = [] 82 | output_mode = False 83 | for child in children: 84 | output = child() 85 | if isinstance(output, MatchError): 86 | g.input.position = saved 87 | return MatchError("And match failed") 88 | if output_mode: 89 | if getattr(output, "name", None) == "out": 90 | outputs.extend(to_list(output.children)) 91 | else: 92 | if getattr(output, "name", None) == "out": 93 | outputs = output.children 94 | output_mode = True 95 | else: 96 | outputs.extend(to_list(output)) 97 | return "".join(outputs) if outputs and type(outputs) == list and all(type(output) == str for output in outputs) and len(outputs[0]) == 1\ 98 | else outputs 99 | 100 | def out(child=lambda: None): 101 | output = child() 102 | return output if isinstance(output, MatchError) else Node("out", output) 103 | 104 | def quantified(child, (_, quantifier)): 105 | lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[quantifier] 106 | outputs = [] 107 | count = 0 108 | start_saved = g.input.position 109 | while count < upper: 110 | saved = g.input.position 111 | output = child() 112 | if isinstance(output, MatchError): 113 | if count < lower: 114 | g.input.position = start_saved 115 | return MatchError("Quantified undermatch %s < %s" % (count, lower)) 116 | else: 117 | g.input.position = saved 118 | return outputs 119 | outputs.extend(to_list(output)) 120 | count += 1 121 | return outputs 122 | 123 | def negation(child): 124 | saved = g.input.position 125 | output = child() 126 | g.input.position = saved 127 | return None if isinstance(output, MatchError) else MatchError("Negation_is_true") 128 | 129 | def bound(child, (_, name)): 130 | saved = g.input.position 131 | output = child() 132 | return output if isinstance(output, MatchError) else\ 133 | Node(name, output, (saved+1, g.input.position+1)) 134 | 135 | def apply_(name): 136 | saved = g.input.position 137 | # func, flagged 138 | output = g.rules[name][0]() 139 | if isinstance(output, MatchError): 140 | return output 141 | if name == "escaped_char": 142 | return({"t": "\t", "n": "\n", "\\\\": "\\", "r": "\r"}.get(output, output)) 143 | if name == "balanced": 144 | return output 145 | if "!" in g.rules[name][1] or (isinstance(output, list) and len(output) > 1): 146 | return Node(name, output, (saved+1, g.input.position+1)) 147 | return output 148 | 149 | def rule_anything(): 150 | char = g.input.next() 151 | return MatchError("End_of_file") if char is None else char 152 | 153 | def rule_letter(): 154 | return(or_([lambda: between("a", "z"), lambda: between("A", "Z")])) 155 | 156 | def rule_digit(): 157 | return(between("0", "9")) 158 | 159 | def closure(child, value): 160 | return str(value) if isinstance(child, str) or child.name in ["quantifier", "inline", "bind"] else "lambda: %s" % value 161 | 162 | def to_python(root): 163 | if isinstance(root, str): 164 | return repr(root) 165 | elif type(root) == list: 166 | #names = [rule[0][0] for rule in root] + ["letter", "digit", "anything"] 167 | named = ", ".join(['"%s": (rule_%s, %s)' % (rule[0][0], rule[0][0], repr("".join(rule[1]))) 168 | for rule in root]) 169 | return "\n\n".join(to_python(child) for child in root 170 | if child[0][0] not in ["letter", "digit"]) +\ 171 | "\n\ng.rules.update({%s})" % named 172 | name = root.name + "_" if root.name in ["and", "or", "apply"] else root.name 173 | if name in ["quantifier", "inline", "bind"]: 174 | return (name, to_python(root[0])[1:-1]) 175 | elif name == "rule": 176 | return "def rule_%s():\n return %s" % (root[0][0], to_python(root[-1])) 177 | else: 178 | children = ", ".join(closure(child, to_python(child)) 179 | for child in root) 180 | if name in ["and_", "or_"]: 181 | children = "[%s]" % children 182 | if name == "output": 183 | name = "out" 184 | return "%s(%s)" % (name, children) 185 | 186 | def gen_from_tree(): 187 | import boot_tree 188 | from util import simple_wrap_tree 189 | return to_python(list(simple_wrap_tree(boot_tree.tree))) 190 | 191 | def match(tree, inp): 192 | g.rules = {'anything': (rule_anything, ''), 'letter': (rule_letter, ''), 193 | 'digit': (rule_digit, '')} 194 | exec to_python(tree) 195 | g.input = Source(inp) 196 | return rule_grammar() 197 | 198 | if __name__ == "__main__": 199 | from boot_grammar import bootstrap 200 | exec gen_from_tree() 201 | #g.input = Source("foo = bar") 202 | g.input = Source(bootstrap) 203 | output = rule_grammar() 204 | print to_python(output) 205 | exec to_python(output) 206 | g.input = Source(bootstrap) 207 | output2 = rule_grammar() 208 | assert(to_python(output) == to_python(output2)) 209 | -------------------------------------------------------------------------------- /pymetaterp/boot_grammar.py: -------------------------------------------------------------------------------- 1 | bootstrap = r""" 2 | name = (letter | '_') (letter | digit | '_')* 3 | expr = apply | exactly | token | parenthesis | output 4 | 5 | exactly! = "'" {(escaped_char | ~'\'' anything)*} "'" 6 | token! = "\"" {(escaped_char | ~'"' anything)*} "\"" 7 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'} 8 | apply! = ('\t'|' ')* {name} 9 | parenthesis = "(" {or} ")" 10 | output! = "{" {or} "}" 11 | 12 | not = "~" {expr=negation} | expr 13 | quantified = not (('*' | '+' | '?')=quantifier)? 14 | bound = quantified ('=' {name=inline})? 15 | and = bound* 16 | or = and ("|" {and})* 17 | 18 | rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})} 19 | grammar = {rule*} spaces 20 | """ 21 | 22 | diff = r""" 23 | comment = '#' (~'\n' anything)* 24 | hspace = ' ' | '\t' | comment 25 | indentation = (hspace* ('\r' '\n' | '\r' | '\n'))* hspace+ 26 | space = '\n' | '\r' | hspace 27 | 28 | expr = apply | exactly | token | parenthesis | output | list 29 | | rule_value | predicate | action 30 | 31 | list! = "[" {or} "]" 32 | predicate! = "?(" {balanced} ')' 33 | action! = "!(" {balanced} ')' 34 | rule_value! = "->" hspace* {(escaped_char | ~'\n' anything)*} 35 | apply! = indentation? {name ('(' {balanced=args} ')')?} 36 | not = "~" "~" {expr=lookahead} | "~" {expr=negation} | expr 37 | bound = ":" {name=bind} 38 | | quantified (':' {name=bind} | '=' {name=inline})? 39 | 40 | balanced = (escaped_char | '(' balanced ')' | ~')' anything)* 41 | """ 42 | 43 | extra = """ 44 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z' 45 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' 46 | space = '\t'|'\n'|'\r'|' ' 47 | spaces = space* 48 | """ 49 | -------------------------------------------------------------------------------- /pymetaterp/boot_stackless.py: -------------------------------------------------------------------------------- 1 | from util import MatchError, Node 2 | 3 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3] 4 | inf = float("inf") 5 | # input is a pair (container, pos) 6 | 7 | class Eval: 8 | pass 9 | 10 | class Frame: 11 | def __init__(self, root, input): 12 | self.root = root 13 | self.calls = [] 14 | self.input = input[:] 15 | self.outputs = [] 16 | 17 | def __repr__(self): 18 | return repr(self.calls) 19 | 20 | def pop(input): 21 | input[1] += 1 22 | try: 23 | return input[0][input[1]] 24 | except IndexError: 25 | return MatchError("EOF") 26 | 27 | def to_list(output): 28 | return output if getattr(output, "name", None) == "And" else\ 29 | [] if output is None else\ 30 | [output] 31 | 32 | def to_node(outputs, join_str): 33 | outputs = [elem for output in outputs 34 | for elem in to_list(output)] 35 | if len(outputs) == 1: 36 | return outputs[0] 37 | elif len(outputs) == 0: 38 | return None 39 | else: 40 | if join_str and all(type(output) == str for output in outputs): 41 | return "".join(outputs) 42 | return Node("And", outputs) 43 | 44 | class Interpreter: 45 | def __init__(self, grammar_tree): 46 | self.rules = {rule[NAME][0]:rule for rule in grammar_tree} 47 | self.join_str = True 48 | 49 | def dbg(self): 50 | print len(self.input[0]), self.input[1] 51 | if len(self.input[0]) == self.input[1] + 1: 52 | return 53 | print self.input[0][self.input[1]: self.input[1] + 200] 54 | 55 | def parse(self, rule_name, input, **kwargs): 56 | output = self.match(self.rules[rule_name][-1], input, **kwargs) 57 | if type(output) == MatchError or len(self.input[0]) == self.input[1] + 1: 58 | return output 59 | return MatchError("Not all input read") 60 | 61 | def match(self, root, input=None, pos=-1): 62 | """ >>> g.match(g.rules['grammar'][-1], "x='y'") """ 63 | self.input = [input, pos] 64 | self.stack = [Frame(root, self.input)] 65 | output = self.new_step() 66 | self.memoizer = {} 67 | while True: 68 | if output is Eval: 69 | root = self.stack[-1].calls[len(self.stack[-1].outputs)] 70 | self.stack.append(Frame(root, self.input)) 71 | output = self.new_step() 72 | else: 73 | if type(output) == Node: 74 | output.pos = (self.stack[-1].input[1]+1, self.input[1]+1) 75 | self.stack.pop() 76 | if not self.stack: 77 | return output 78 | #print len(self.stack)*" ", "returned", output 79 | self.stack[-1].outputs.append(output) 80 | output = self.next_step() 81 | 82 | def new_step(self): 83 | root = self.stack[-1].root 84 | name = root.name 85 | calls = self.stack[-1].calls 86 | #print len(self.stack)*" ", "matching", name 87 | if name in ["and", "args", "output", "or"]: 88 | calls.extend(root) 89 | elif name in ["bound", "negation", "quantified"]: 90 | calls.append(root[0]) 91 | elif name == "apply": 92 | # print " "*len(self.stack), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11] 93 | if root[NAME] == "anything": 94 | return pop(self.input) 95 | key = (root[NAME], id(self.input[0]), self.input[1]) 96 | if key in self.memoizer: 97 | self.input = self.memoizer[key][1][:] 98 | return self.memoizer[key][0] 99 | self.stack[-1].key = key 100 | calls.append(self.rules[root[NAME]][BODY]) 101 | elif name in ["exactly", "token"]: 102 | if name == "token": 103 | while pop(self.input) in ['\t', '\n', '\r', ' ']: 104 | pass 105 | if self.input[1] == len(self.input[0]): 106 | return MatchError("EOF") 107 | self.input[1] -= 1 108 | for char in root[0]: 109 | if pop(self.input) != char: 110 | return MatchError("Not exactly %s" % root[0]) 111 | return root[0] 112 | return Eval 113 | 114 | def next_step(self): 115 | frame = self.stack[-1] 116 | root = frame.root 117 | name = root.name 118 | outputs = frame.outputs 119 | output = outputs[-1] if outputs else None 120 | is_error = type(output) == MatchError 121 | finished = len(outputs) == len(frame.calls) 122 | if is_error and name not in ["quantified", "or", "negation"]: 123 | return output 124 | elif not (finished or name in ["or", "quantified"]): 125 | return Eval 126 | if name in ["and", "args", "output"]: 127 | if any(child.name == "output" for child in root): 128 | outputs = [output for child, output in zip(root, outputs) 129 | if child.name == "output"] 130 | return to_node(outputs, self.join_str) 131 | elif name == "quantified": 132 | assert(root[1].name == "quantifier") 133 | lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]] 134 | if is_error: 135 | self.input = frame.input[:] 136 | outputs.pop() 137 | #print("output len", len(outputs)) 138 | if is_error or len(outputs) == upper or frame.input == self.input: 139 | if lower > len(outputs): 140 | return MatchError("Matched %s < %s times" % (len(outputs), lower)) 141 | else: 142 | return to_node(outputs, self.join_str) 143 | else: 144 | frame.input = self.input[:] 145 | self.stack[-1].calls.append(root[0]) 146 | elif name == "or": 147 | if is_error: 148 | self.input = frame.input[:] 149 | if finished: 150 | return MatchError("All Or matches failed") 151 | else: 152 | return output 153 | elif name == "apply": 154 | if root[NAME] == "escaped_char" and not is_error: 155 | chars = dict(["''", '""', "t\t", "n\n", "r\r", 156 | "b\b", "f\f", "\\\\"]) 157 | return chars[output] 158 | and_node = getattr(output, "name", None) == "And" 159 | make_node = "!" in self.rules[root[NAME]][FLAGS] or\ 160 | (and_node and len(output) > 1) 161 | #print len(self.stack)*" ", "returned", output 162 | if make_node: 163 | output = Node(root[NAME], to_list(output)) 164 | self.memoizer[frame.key] = (output, self.input[:]) 165 | return output 166 | 167 | elif name in "bound": 168 | return Node(root[1][0], to_list(output)) 169 | elif name == "negation": 170 | if is_error: 171 | self.input = frame.input 172 | return None 173 | else: 174 | return MatchError("Negation true") 175 | else: 176 | raise Exception("Unknown operator %s" % name) 177 | return Eval 178 | -------------------------------------------------------------------------------- /pymetaterp/boot_tree.py: -------------------------------------------------------------------------------- 1 | tree = ['And', 2 | ['rule', 3 | ['rule_name', 'name'], 4 | ['flags'], 5 | ['args'], 6 | ['and', 7 | ['or', ['apply', 'letter'], ['exactly', '_']], 8 | ['quantified', 9 | ['or', ['apply', 'letter'], ['apply', 'digit'], ['exactly', '_']], 10 | ['quantifier', '*']]]], 11 | ['rule', 12 | ['rule_name', 'expr'], 13 | ['flags'], 14 | ['args'], 15 | ['or', 16 | ['apply', 'apply'], 17 | ['apply', 'exactly'], 18 | ['apply', 'token'], 19 | ['apply', 'parenthesis'], 20 | ['apply', 'output']]], 21 | ['rule', 22 | ['rule_name', 'exactly'], 23 | ['flags', '!'], 24 | ['args'], 25 | ['and', 26 | ['token', "'"], 27 | ['output', 28 | ['quantified', 29 | ['or', 30 | ['apply', 'escaped_char'], 31 | ['and', ['negation', ['exactly', "'"]], ['apply', 'anything']]], 32 | ['quantifier', '*']]], 33 | ['token', "'"]]], 34 | ['rule', 35 | ['rule_name', 'token'], 36 | ['flags', '!'], 37 | ['args'], 38 | ['and', 39 | ['token', '"'], 40 | ['output', 41 | ['quantified', 42 | ['or', 43 | ['apply', 'escaped_char'], 44 | ['and', ['negation', ['exactly', '"']], ['apply', 'anything']]], 45 | ['quantifier', '*']]], 46 | ['token', '"']]], 47 | ['rule', 48 | ['rule_name', 'escaped_char'], 49 | ['flags', '!'], 50 | ['args'], 51 | ['and', 52 | ['exactly', '\\'], 53 | ['output', 54 | ['or', 55 | ['exactly', 'n'], 56 | ['exactly', 'r'], 57 | ['exactly', 't'], 58 | ['exactly', 'b'], 59 | ['exactly', 'f'], 60 | ['exactly', '"'], 61 | ['exactly', "'"], 62 | ['exactly', '\\']]]]], 63 | ['rule', 64 | ['rule_name', 'apply'], 65 | ['flags', '!'], 66 | ['args'], 67 | ['and', 68 | ['quantified', 69 | ['or', ['exactly', '\t'], ['exactly', ' ']], 70 | ['quantifier', '*']], 71 | ['output', ['apply', 'name']]]], 72 | ['rule', 73 | ['rule_name', 'parenthesis'], 74 | ['flags'], 75 | ['args'], 76 | ['and', ['token', '('], ['output', ['apply', 'or']], ['token', ')']]], 77 | ['rule', 78 | ['rule_name', 'output'], 79 | ['flags', '!'], 80 | ['args'], 81 | ['and', ['token', '{'], ['output', ['apply', 'or']], ['token', '}']]], 82 | ['rule', 83 | ['rule_name', 'not'], 84 | ['flags'], 85 | ['args'], 86 | ['or', 87 | ['and', 88 | ['token', '~'], 89 | ['output', ['bound', ['apply', 'expr'], ['inline', 'negation']]]], 90 | ['apply', 'expr']]], 91 | ['rule', 92 | ['rule_name', 'quantified'], 93 | ['flags'], 94 | ['args'], 95 | ['and', 96 | ['apply', 'not'], 97 | ['quantified', 98 | ['bound', 99 | ['or', ['exactly', '*'], ['exactly', '+'], ['exactly', '?']], 100 | ['inline', 'quantifier']], 101 | ['quantifier', '?']]]], 102 | ['rule', 103 | ['rule_name', 'bound'], 104 | ['flags'], 105 | ['args'], 106 | ['and', 107 | ['apply', 'quantified'], 108 | ['quantified', 109 | ['and', 110 | ['exactly', '='], 111 | ['output', ['bound', ['apply', 'name'], ['inline', 'inline']]]], 112 | ['quantifier', '?']]]], 113 | ['rule', 114 | ['rule_name', 'and'], 115 | ['flags'], 116 | ['args'], 117 | ['quantified', ['apply', 'bound'], ['quantifier', '*']]], 118 | ['rule', 119 | ['rule_name', 'or'], 120 | ['flags'], 121 | ['args'], 122 | ['and', 123 | ['apply', 'and'], 124 | ['quantified', 125 | ['and', ['token', '|'], ['output', ['apply', 'and']]], 126 | ['quantifier', '*']]]], 127 | ['rule', 128 | ['rule_name', 'rule'], 129 | ['flags'], 130 | ['args'], 131 | ['and', 132 | ['apply', 'spaces'], 133 | ['output', 134 | ['and', 135 | ['bound', ['apply', 'name'], ['inline', 'rule_name']], 136 | ['bound', 137 | ['quantified', ['exactly', '!'], ['quantifier', '?']], 138 | ['inline', 'flags']], 139 | ['bound', ['apply', 'and'], ['inline', 'args']], 140 | ['and', ['token', '='], ['output', ['apply', 'or']]]]]]], 141 | ['rule', 142 | ['rule_name', 'grammar'], 143 | ['flags'], 144 | ['args'], 145 | ['and', 146 | ['output', ['quantified', ['apply', 'rule'], ['quantifier', '*']]], 147 | ['apply', 'spaces']]], 148 | ['rule', 149 | ['rule_name', 'letter'], 150 | ['flags'], 151 | ['args'], 152 | ['or', 153 | ['exactly', 'a'], 154 | ['exactly', 'b'], 155 | ['exactly', 'c'], 156 | ['exactly', 'd'], 157 | ['exactly', 'e'], 158 | ['exactly', 'f'], 159 | ['exactly', 'g'], 160 | ['exactly', 'h'], 161 | ['exactly', 'i'], 162 | ['exactly', 'j'], 163 | ['exactly', 'k'], 164 | ['exactly', 'l'], 165 | ['exactly', 'm'], 166 | ['exactly', 'n'], 167 | ['exactly', 'o'], 168 | ['exactly', 'p'], 169 | ['exactly', 'q'], 170 | ['exactly', 'r'], 171 | ['exactly', 's'], 172 | ['exactly', 't'], 173 | ['exactly', 'u'], 174 | ['exactly', 'v'], 175 | ['exactly', 'w'], 176 | ['exactly', 'x'], 177 | ['exactly', 'y'], 178 | ['exactly', 'z'], 179 | ['exactly', 'A'], 180 | ['exactly', 'B'], 181 | ['exactly', 'C'], 182 | ['exactly', 'D'], 183 | ['exactly', 'E'], 184 | ['exactly', 'F'], 185 | ['exactly', 'G'], 186 | ['exactly', 'H'], 187 | ['exactly', 'I'], 188 | ['exactly', 'J'], 189 | ['exactly', 'K'], 190 | ['exactly', 'L'], 191 | ['exactly', 'M'], 192 | ['exactly', 'N'], 193 | ['exactly', 'O'], 194 | ['exactly', 'P'], 195 | ['exactly', 'Q'], 196 | ['exactly', 'R'], 197 | ['exactly', 'S'], 198 | ['exactly', 'T'], 199 | ['exactly', 'U'], 200 | ['exactly', 'V'], 201 | ['exactly', 'W'], 202 | ['exactly', 'X'], 203 | ['exactly', 'Y'], 204 | ['exactly', 'Z']]], 205 | ['rule', 206 | ['rule_name', 'digit'], 207 | ['flags'], 208 | ['args'], 209 | ['or', 210 | ['exactly', '0'], 211 | ['exactly', '1'], 212 | ['exactly', '2'], 213 | ['exactly', '3'], 214 | ['exactly', '4'], 215 | ['exactly', '5'], 216 | ['exactly', '6'], 217 | ['exactly', '7'], 218 | ['exactly', '8'], 219 | ['exactly', '9']]], 220 | ['rule', 221 | ['rule_name', 'space'], 222 | ['flags'], 223 | ['args'], 224 | ['or', 225 | ['exactly', '\t'], 226 | ['exactly', '\n'], 227 | ['exactly', '\r'], 228 | ['exactly', ' ']]], 229 | ['rule', 230 | ['rule_name', 'spaces'], 231 | ['flags'], 232 | ['args'], 233 | ['quantified', ['apply', 'space'], ['quantifier', '*']]]] 234 | -------------------------------------------------------------------------------- /pymetaterp/python.py: -------------------------------------------------------------------------------- 1 | import boot_stackless as boot 2 | reload(boot) 3 | from boot_stackless import * 4 | from pdb import set_trace as bp 5 | from bisect import bisect_left as bisect 6 | 7 | class Interpreter(boot.Interpreter): 8 | def match(self, root, input=None, pos=-1, locals=None, debug=False): 9 | self.indentation = [0] 10 | self.default_locals = self.locals = {} if locals is None else dict(locals) 11 | self.debug = debug 12 | self.memoizer = {} 13 | return boot.Interpreter.match(self, root, input, pos) 14 | 15 | def eval(self, root): 16 | self.locals['self'] = self 17 | output = eval(root, globals(), self.locals) 18 | del self.locals['self'] 19 | return output 20 | 21 | def new_step(self): 22 | root = self.stack[-1].root 23 | name = root.name 24 | calls = self.stack[-1].calls 25 | if name in ["and", "args", "output", "or"]: 26 | if len(root) == 0 and name in ["and", "args", "output"]: 27 | return 28 | calls.extend(root) 29 | elif name in ["lookahead"]: 30 | calls.append(root[0]) 31 | elif name == "exactly": 32 | if pop(self.input) != root[0]: 33 | return MatchError("Not exactly %s" % root[0]) 34 | return root[0] 35 | elif name == "token": 36 | while pop(self.input) in ['\t', ' ', '\\']: 37 | if self.input[0][self.input[1]] == '\\': 38 | pop(self.input) 39 | if self.input[1] == len(self.input[0]): 40 | return MatchError("EOF") 41 | self.input[1] -= 1 42 | for char in root[0]: 43 | if pop(self.input) != char: 44 | return MatchError("Not exactly %s" % root[0]) 45 | if root[0].isalpha(): 46 | top = pop(self.input) 47 | if top.isalnum() or top == '_': 48 | return MatchError("Prefix matched but didn't end.") 49 | self.input[1] -= 1 50 | return root[0] 51 | elif name == "apply": 52 | if self.debug: 53 | print " "*len(self.stack), "applying", root[NAME], self.input[1], str(self.input[0][self.input[1]+1:self.input[1]+11])[:20] 54 | if root[NAME] == "anything": 55 | return pop(self.input) 56 | elif root[NAME] == "void": 57 | return 58 | else: 59 | key = (root[NAME], id(self.input[0]), self.input[1], 60 | tuple(self.indentation)) 61 | if key in self.memoizer: 62 | self.input = self.memoizer[key][1][:] 63 | return self.memoizer[key][0] 64 | self.stack[-1].key = key 65 | calls.append(self.rules[root[NAME]][BODY]) 66 | self.stack[-1].locals = self.locals 67 | self.locals = dict(self.default_locals) 68 | elif name == "rule_value": 69 | return self.eval(root[0]) 70 | elif name == "predicate": 71 | output = self.eval(root[0]) 72 | if not output: 73 | return MatchError("Predicate evaluates to false") 74 | elif output == True: 75 | return None 76 | else: 77 | return Node("predicate", [output]) 78 | elif name == "action": 79 | self.locals['self'] = self 80 | exec(root[0], globals(), self.locals) 81 | del self.locals['self'] 82 | return 83 | else: 84 | return boot.Interpreter.new_step(self) 85 | return Eval 86 | 87 | def next_step(self): 88 | frame = self.stack[-1] 89 | root = frame.root 90 | name = root.name 91 | outputs = frame.outputs 92 | output = outputs[-1] if outputs else None 93 | is_error = type(output) == MatchError 94 | finished = len(outputs) == len(frame.calls) 95 | if self.debug and name in ["apply"]: 96 | print " "*len(self.stack), name, "->", output 97 | if is_error and name not in ["quantified", "or", "negation", "apply"]: 98 | return output 99 | elif not (finished or name in ["or", "quantified"]): 100 | return Eval 101 | if name in ["and", "args", "output"]: 102 | assert(len(outputs) == len(root)) 103 | if any(child.name == "output" for child in root): 104 | outputs = [output for child, output in zip(root, outputs) 105 | if child.name == "output"] 106 | elif any(child.name == "rule_value" for child in root): 107 | outputs = [output for child, output in zip(root, outputs) 108 | if child.name == "rule_value"] 109 | assert(len(outputs) == 1) 110 | return to_node(outputs, self.join_str) 111 | elif name in "bound": 112 | if root[1].name == "inline": 113 | return Node(root[1][0], to_list(output)) 114 | else: # bind 115 | self.locals[root[1][0]] = output 116 | return 117 | elif name == "apply": 118 | # Need to run this line even on error 119 | self.locals = frame.locals 120 | output = boot.Interpreter.next_step(self) 121 | self.memoizer[frame.key] = (output, self.input[:]) 122 | return output 123 | elif name == "lookahead": 124 | self.input = frame.input[:] 125 | return output 126 | else: 127 | return boot.Interpreter.next_step(self) 128 | return Eval 129 | 130 | def st(self): 131 | source = self.source 132 | stack = self.stack 133 | filename = getattr(self, "filename", "") 134 | source_lines = [line+"\n" for line in source.split("\n")] 135 | line_len = [len(l) for l in source_lines] 136 | source_line_num = [sum(line_len[:i+1]) for i in xrange(len(line_len))] 137 | func_name = "None" 138 | for i, frame in enumerate(stack): 139 | line_num = [bisect(source_line_num, p) for p in frame.root.pos] 140 | rel_pos = [p - (source_line_num[line_num[0]-1] if line_num[0] else 0) 141 | for p in frame.root.pos] 142 | lines = "".join(source_lines[line_num[0]: line_num[1]+1]) 143 | print str(i).ljust(2) + " In file " + '\033[92m' + filename + '\033[0m' + " line " + str(line_num[0]) + " function " + '\033[92m' + str(func_name) + " (" + frame.root.name + ")" + '\033[0m' 144 | print lines[:rel_pos[0]] + '\033[91m' + lines[rel_pos[0]: rel_pos[1]] + '\033[0m' + lines[rel_pos[1]:-1] 145 | if frame.root.name == "apply": 146 | func_name = frame.root[0] 147 | 148 | def reformat_atom(atom, trailers): 149 | output = atom 150 | for trailer in to_list(trailers): 151 | pos = (output.pos[0], trailer.pos[1]) 152 | if trailer.name == "arglist": 153 | output = Node("__call__", [output, trailer], pos=pos) 154 | elif trailer.name == "NAME": 155 | output = Node("__getattr__", [output, Node("NAME", trailer, 156 | pos=trailer.pos)], pos=pos) 157 | elif trailer.name == "subscriptlist": 158 | output = Node("__getitem__", [output] + trailer, pos=pos) 159 | else: 160 | raise Exception("Unknown trailer %s" % trailer.name) 161 | return output 162 | 163 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=", 164 | "in", "not in", "is not", "is"), 165 | ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"), 166 | ("*", "/", "%", "//"), ("**",)) 167 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops} 168 | expr_ops = binary_ops[1:] 169 | 170 | def reformat_binary(start, oper_and_atoms): 171 | def parse(lhs, tokens, index=0): 172 | threshold = priority[tokens[index][0][0]] 173 | while index < len(tokens): 174 | op, rhs = tokens[index] 175 | assert(type(op) != str) 176 | op = op[0] 177 | if priority[op] < threshold: 178 | break 179 | index += 1 180 | while index < len(tokens) and\ 181 | priority[tokens[index][0][0]] > priority[op]: 182 | rhs, index = parse(rhs, tokens, index) 183 | lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1])) 184 | return (lhs, index) 185 | if not oper_and_atoms: 186 | return start 187 | tokens = zip(oper_and_atoms[::2], oper_and_atoms[1::2]) 188 | lhs, index = start, 0 189 | while index < len(tokens): 190 | lhs, index = parse(lhs, tokens, index) 191 | return lhs 192 | 193 | def any_token(input, binary=True): 194 | ops = binary_ops if binary else expr_ops 195 | old_input = input[:] 196 | for tokens in ops: 197 | for token in tokens: 198 | if all(pop(input) == char for char in token): 199 | return token 200 | input[:] = old_input[:] 201 | return False 202 | -------------------------------------------------------------------------------- /pymetaterp/python_compiled.py: -------------------------------------------------------------------------------- 1 | from boot_compiled import * 2 | 3 | def eval_(expr): 4 | g.locals['self'] = g 5 | output = eval(expr, globals(), g.locals) 6 | del g.locals['self'] 7 | return output 8 | 9 | def lookahead(child): 10 | saved = g.input.position 11 | output = child() 12 | g.input.position = saved 13 | return output 14 | 15 | def token(s): 16 | while g.input.next() in ['\t', '\n', '\r', ' ']: 17 | pass 18 | g.input.position -= 1 19 | for char in s: 20 | if g.input.next() != char: 21 | return MatchError("Not exactly %s" % char) 22 | if char.isalpha(): 23 | top = g.input.next() 24 | if top.isalnum() or top == '_': 25 | return MatchError("Prefix matched but didn't end.") 26 | g.input.position -= 1 27 | return s 28 | 29 | def and_(children): 30 | saved = g.input.position 31 | outputs = [] 32 | output_mode = None 33 | for child in children: 34 | output = child() 35 | if isinstance(output, MatchError): 36 | g.input.position = saved 37 | return MatchError("And match failed") 38 | if output_mode: 39 | if getattr(output, "name", None) == output_mode: 40 | outputs.extend(to_list(output.children)) 41 | else: 42 | if getattr(output, "name", None) == "out": 43 | outputs = to_list(output.children) 44 | output_mode = "out" 45 | elif getattr(output, "name", None) == "rule_value": 46 | outputs = to_list(output.children) 47 | output_mode = "rule_value" 48 | else: 49 | outputs.extend(to_list(output)) 50 | return "".join(outputs) if outputs and all(type(output) == str for output in outputs) and len(outputs[0]) == 1\ 51 | else outputs 52 | 53 | # Not a rule! Should rename this node to just 'value'? 54 | def rule_value(expr): 55 | # Not normally wrapped in Node, need to rethink! 56 | return Node("rule_value", eval_(expr)) 57 | 58 | def predicate(expr): 59 | output = eval_(expr) 60 | if not output: 61 | return MatchError("Predicate evaluates to false") 62 | elif output == True: 63 | return None 64 | else: 65 | return Node("predicate", [output]) 66 | 67 | def action(expr): 68 | g.locals['self'] = g 69 | exec(expr, globals(), g.locals) 70 | del g.locals['self'] 71 | return 72 | 73 | def bound(child, (type, name)): 74 | saved = g.input.position 75 | output = child() 76 | if type == "inline": 77 | return output if isinstance(output, MatchError) else\ 78 | Node(name, output, (saved+1, g.input.position+1)) 79 | else: # bind 80 | g.locals[name] = output 81 | 82 | def apply_(name): 83 | if g.debug: 84 | print " "*g.nest, name, g.input.source[g.input.position+1: g.input.position+10] 85 | key = (name, id(g.input.source), g.input.position, tuple(g.indentation)) 86 | # Should also memoize output indentation! 87 | if key in g.memoizer: 88 | g.input.source, g.input.position = g.memoizer[key][1][:] 89 | return g.memoizer[key][0] 90 | saved_locals = g.locals 91 | g.locals = g.default_locals 92 | # func, flagged 93 | g.nest += 1 94 | saved = g.input.position 95 | output = g.rules[name][0]() 96 | g.nest -= 1 97 | if g.debug: 98 | print " "*g.nest, name, "->", output 99 | g.locals = saved_locals 100 | if (not isinstance(output, MatchError) and "!" in g.rules[name][1]) or\ 101 | (isinstance(output, list) and len(output) > 1): 102 | output = Node(name, output, (saved+1, g.input.position+1)) 103 | g.memoizer[key] = (output, [g.input.source, g.input.position]) 104 | return output 105 | 106 | def rule_void(): 107 | return 108 | 109 | def reformat_atom(atom, trailers): 110 | if trailers: 111 | bp() 112 | output = atom 113 | for trailer in trailers: 114 | pos = (output.pos[0], trailer.pos[1]) 115 | if trailer.name == "arglist": 116 | output = Node("__call__", [output, trailer], pos=pos) 117 | elif trailer.name == "NAME": 118 | output = Node("__getattr__", [output, Node("NAME", trailer, 119 | pos=trailer.pos)], pos=pos) 120 | elif trailer.name == "subscriptlist": 121 | output = Node("__getitem__", [output] + trailer, pos=pos) 122 | else: 123 | raise Exception("Unknown trailer %s" % trailer.name) 124 | return output 125 | 126 | 127 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=", 128 | "in", "not in", "is not", "is"), 129 | ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"), 130 | ("*", "/", "%", "//"), ("**",)) 131 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops} 132 | expr_ops = binary_ops[1:] 133 | 134 | def reformat_binary(start, oper_and_atoms): 135 | def parse(lhs, tokens, index=0): 136 | threshold = priority[tokens[index][0][0]] 137 | while index < len(tokens): 138 | op, rhs = tokens[index] 139 | assert(type(op) != str) 140 | op = op[0] 141 | if priority[op] < threshold: 142 | break 143 | index += 1 144 | while index < len(tokens) and\ 145 | priority[tokens[index][0][0]] > priority[op]: 146 | rhs, index = parse(rhs, tokens, index) 147 | lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1])) 148 | return (lhs, index) 149 | if not oper_and_atoms: 150 | return start 151 | tokens = zip(oper_and_atoms[::2], oper_and_atoms[1::2]) 152 | lhs, index = start[0], 0 153 | while index < len(tokens): 154 | lhs, index = parse(lhs, tokens, index) 155 | return lhs 156 | 157 | def any_token(input, binary=True): 158 | ops = binary_ops if binary else expr_ops 159 | old_input = g.input.position 160 | for tokens in ops: 161 | for token in tokens: 162 | if all(g.input.next() == char for char in token): 163 | return token 164 | g.input.position = old_input 165 | return False 166 | 167 | def match(tree, inp, debug=False, locals=None): 168 | g.rules = {'anything': (rule_anything, ''), 'letter': (rule_letter, ''), 169 | 'digit': (rule_digit, ''), 'void': (rule_void, ''),} 170 | g.indentation = [0] 171 | g.memoizer = {} 172 | g.locals = g.default_locals = {} if locals is None else dict(locals) 173 | g.nest = 0 174 | g.debug = debug 175 | exec to_python(tree) 176 | g.input = Source(inp) 177 | return rule_grammar() 178 | -------------------------------------------------------------------------------- /pymetaterp/python_grammar.py: -------------------------------------------------------------------------------- 1 | full_definition = r""" 2 | comment = ('#' {(~'\n' {anything})*})=comment 3 | hspaces = (' ' | '\t' | escaped_linebreak)* 4 | hspacesp = (' ' | '\t' | escaped_linebreak)+ 5 | escaped_linebreak = '\\' {'\n'} 6 | 7 | single_input = EMPTY_LINE | simple_stmt | (compound_stmt EMPTY_LINE) 8 | file_input = (EMPTY_LINE | SAME_INDENT stmt)* ENDMARKER 9 | eval_input = testlist NEWLINE? EMPTY_LINE* ENDMARKER 10 | 11 | decorator! = "@" {dotted_name ("(" {arglist} ")")?} NEWLINE 12 | decorators! = decorator+ 13 | decorated = decorators (classdef | funcdef) 14 | funcdef = "def" {NAME} "(" {parameters | void=parameters} ")" ":" {suite} 15 | # Check order validity elsewhere (at most one remaining_args and one kwargs) 16 | parameters! = {fpdef_opt (comma {fpdef_opt})*} comma? 17 | 18 | fpdef = NAME | "(" fplist ")" 19 | fpdef_opt = fpdef ("=" {test})? | "*" {NAME=remaining_args} | "**" {NAME=kwargs} 20 | fplist = {fpdef (comma {fpdef})*} comma? 21 | 22 | stmt = compound_stmt | simple_stmt 23 | simple_stmt = {small_stmt (";" {small_stmt})*} ";"? NEWLINE 24 | small_stmt = print_stmt | del_stmt | pass_stmt | flow_stmt | comment 25 | | import_stmt | global_stmt | exec_stmt | assert_stmt | expr_stmt 26 | 27 | expr_stmt = aug_assign | regular_assign | testlist 28 | aug_assign_symbol = "+=" | "-=" | "*=" | "/=" | "%=" | "&=" 29 | | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" 30 | aug_assign = testlist aug_assign_symbol=operation (yield_expr|testlist) 31 | regular_assign = testlist ("=" {yield_expr|testlist})+ 32 | # For normal assignments, additional restrictions enforced by the interpreter 33 | print_stmt! = "print" { {test ("," {test})*} ","? 34 | | ">>" test ( ("," test)+ ","? )? | void} 35 | del_stmt! = "del" hspacesp {exprlist} 36 | pass_stmt! = "pass" {} 37 | flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 38 | break_stmt! = "break" {} 39 | continue_stmt! = "continue" {} 40 | return_stmt! = "return" {testlist?} 41 | yield_stmt = yield_expr 42 | raise_stmt! = "raise" {(test ("," test ("," test))?)?} 43 | import_stmt = simport_stmt | import_name | import_from 44 | simport_stmt! = "simport" {NAME} 45 | import_name = "import" {import_names} 46 | import_names! = dotted_as_name ("," {dotted_as_name})* 47 | import_from! = "from" {"."* dotted_name | "."+} 48 | "import" {"*"=import_all | "(" {import_as_names} ")" | import_as_names} 49 | import_as_name = NAME ("as" {NAME})? 50 | dotted_as_name = dotted_name ("as" {NAME})? 51 | import_as_names! = {import_as_name ("," {import_as_name})*} ","? 52 | dotted_name = NAME ("." {NAME})* 53 | global_stmt = "global" NAME ("," NAME)* 54 | exec_stmt! = "exec" {expr ("in" {test} ("," {test})?)?} 55 | assert_stmt! = "assert" {test ("," test)?} 56 | 57 | compound_stmt = if_stmt | while_true_stmt=while_true | while_stmt 58 | | simple_for_stmt | for_stmt | try_stmt | with_stmt 59 | | funcdef | classdef | decorated 60 | if_stmt = ("if" {test} ":" {suite})=single_if 61 | ((SAME_INDENT "elif" {test} ":" {suite})=single_if)* 62 | ((SAME_INDENT "else" ":" {void=gen_true suite})=single_if)? 63 | while_true_stmt = "while_true" ":" {suite} 64 | while_stmt = "while" {test} ":" {suite (SAME_INDENT "else" ":" {suite})?} 65 | for_stmt = "for" {exprlist} "in" {testlist} ":" {suite} {(SAME_INDENT "else" ":" {suite})?} 66 | simple_for_stmt = "simple_for" {exprlist} "in" {testlist} ":" {suite} 67 | try_stmt! = "try" ":" {suite} 68 | {((SAME_INDENT {exception} ":" {suite})=except_clause)+=except_clauses 69 | (SAME_INDENT "else" ":" suite)? 70 | (SAME_INDENT "finally" ":" suite)? 71 | | SAME_INDENT "finally" ":" suite} 72 | with_stmt = "with" with_item ("," with_item)* ":" suite 73 | with_item = test ("as" expr)? 74 | # NB compile.c makes sure that the default except clause is last 75 | exception! = "except" {(test (("as" | ",") {test})?)?} 76 | # Should "give back" the consumed empty lines at the end! 77 | suite = NEWLINE INDENT {(SAME_INDENT stmt | EMPTY_LINE)+} DEDENT 78 | | simple_stmt 79 | 80 | testlist = {test ("," {test})*} ","? 81 | yield_expr! = "yield" {testlist?} 82 | 83 | test = lambdef | or_test ("if" {or_test} {("else" {test})?})? 84 | or_test = and_test ("or" {and_test})* 85 | and_test = not_test ("and" {not_test})* 86 | not_test = ("not" {not_test})=not_test | comparison 87 | 88 | comparison = factor:start (hspaces {?(any_token(self.input))} 89 | hspaces {factor})*:oper_and_atoms 90 | -> reformat_binary(start, oper_and_atoms) 91 | expr = factor:start (hspaces {?(any_token(self.input, binary=False))} 92 | hspaces {factor})*:oper_and_atoms 93 | -> reformat_binary(start, oper_and_atoms) 94 | 95 | factor = ("+"|"-"|"~")* power 96 | power = trailed_atom ("**" factor)? 97 | trailed_atom = atom:atom trailer*:trailers -> reformat_atom(atom, trailers) 98 | atom = "(" spaces {parenthesis} spaces ")" 99 | | "[" spaces {listmaker | void=listmaker} spaces "]" 100 | | "{" spaces {dictmaker} spaces "}" 101 | | "{" {setmaker} spaces "}" 102 | | "`" {(stmt | small_stmt)=thunk} "`" 103 | | STRINGS | NAME | NUMBER 104 | parenthesis = yield_expr | testlist_comp=generator | tuple 105 | | test | void=no_param 106 | listmaker! = (test list_for list_iter*)=listcomp 107 | | {test (comma {test})*} comma? 108 | testlist_comp = test list_for list_iter* 109 | tuple! = ({test} comma)+ test? 110 | lambdef! = "lambda" {parameters? | void=parameters} ":" {test} 111 | trailer = "(" spaces {arglist} spaces ")" 112 | | "[" spaces {subscriptlist=subscriptlist} spaces "]" 113 | | "." {NAME} 114 | subscriptlist! = subscript ("," {subscript})* ","? 115 | subscript! = "..."=ellipsis | ({test?=start} ":" {test?=stop} {step?})=slice | test 116 | exprlist = {expr ("," {expr})*} ","? 117 | step! = ":" {test?} 118 | dictmaker! = ({test} ":" {test} {list_for} {list_iter*})=dictcomp 119 | | {({test} ":" {test})=pair ((comma {test} ":" {test})=pair)*} comma? 120 | | void 121 | 122 | setmaker! = test (list_for list_iter* | (("," test)* ","?)) 123 | 124 | classdef = "class" {NAME} {("(" {testlist?} ")")?=parents} ":" {suite} 125 | 126 | arglist! = ({argument} comma)* ( "**" {test=kwargs} 127 | | "*" {test=remaining_args ("," keyword_arg)* ("," "**" {test=kwargs})?} 128 | | {argument | void} ) 129 | comma? 130 | 131 | comma = "," spaces 132 | 133 | argument = keyword_arg | listcomp_arg 134 | keyword_arg = {test} "=" {test} 135 | listcomp_arg = test (list_for list_iter*)? 136 | 137 | list_iter = list_for | list_if 138 | list_for = spaces "for" {exprlist} "in" {or_test} # {testlist_safe} 139 | list_if! = spaces "if" {or_test} 140 | 141 | testlist_safe = or_test ((',' or_test)+ ','?)? 142 | testlist1 = test ("," test)* 143 | 144 | NUMBER! = hspaces digit+:s -> int("".join(n[0] for n in s)) 145 | # Probably need to check that the result isn't a reserved word. 146 | NAME! = hspaces {((letter | '_') (letter | digit | '_')*)} 147 | STRINGS = {STRING | RAW_STRING=STRING} (spaces {STRING | RAW_STRING=STRING})* 148 | STRING! = hspaces stype? '"' '"' '"' {(escaped_char | ~('"' '"' '"') {anything})*} '"' '"' '"' 149 | | hspaces stype? '\'' {(escaped_char | ~'\'' anything)*} '\'' 150 | | hspaces stype? '"' {(escaped_char | ~'"' anything)*} '"' 151 | RAW_STRING = hspaces 'r' '"' '"' '"' {(~('"' '"' '"') {anything})*} '"' '"' '"' 152 | | hspaces 'r' '\'' {(~'\'' anything)*} '\'' 153 | | hspaces 'r' '"' {(~'"' anything)*} '"' 154 | stype! = 'b' 155 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'} 156 | EMPTY_LINE = (hspaces comment? ('\n' | '\r'))=EMPTY_LINE 157 | NEWLINE = hspaces (comment hspaces)? ('\n' | '\r') 158 | SAME_INDENT = hspaces:s ?(self.indentation[-1] == (len(s) if s != None else 0)) 159 | ENDMARKER = ~anything 160 | INDENT = ~~hspaces:s !(self.indentation.append(len(s) if s != None else 0)) 161 | DEDENT = !(self.indentation.pop()) 162 | 163 | grammar = file_input 164 | """ 165 | 166 | extra = """ 167 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z' 168 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' 169 | space = '\t'|'\n'|'\r'|' '|comment 170 | spaces = space* 171 | """ 172 | -------------------------------------------------------------------------------- /pymetaterp/util.py: -------------------------------------------------------------------------------- 1 | def simple_wrap_tree(root): 2 | if type(root) != list: 3 | return root 4 | return Node(root[0], map(simple_wrap_tree, root[1:])) 5 | 6 | class MatchError(Exception): 7 | pass 8 | 9 | class Node(list): 10 | def __init__(self, name=None, value=None, params=None, **kw): 11 | list.__init__(self, value if value is not None else []) 12 | self.name = name 13 | self.params = params if params is not None else {} 14 | for key, value in kw.items(): 15 | setattr(self, key, value) 16 | 17 | def __repr__(self): 18 | return "%s%s" % (self.name, list.__repr__(self)) 19 | 20 | def pprint(self, max_depth=None, max_width=None, indent=0, filter=None): 21 | if max_depth and indent/2 > max_depth: 22 | return 23 | print_node = bool(filter is None or filter(self)) 24 | if print_node: 25 | print " "*indent + self.name 26 | for child in self: 27 | if not hasattr(child, "pprint"): 28 | if print_node: 29 | print "%s%s %s" % (" "*(indent + 2), type(child).__name__, 30 | repr(child)) 31 | else: 32 | child.pprint(max_depth, max_width, indent + 2*print_node, filter) 33 | 34 | def save(self, filename="tree.py"): 35 | from pprint import pprint 36 | f = open(filename, "w") 37 | f.write("tree = ") 38 | pprint(self.to_list(), f) 39 | 40 | def to_list(self): 41 | return [self.name] + [elem.to_list() if hasattr(elem, "name") else elem 42 | 43 | for elem in self] 44 | 45 | def to_lisp(self): 46 | return "(%s)" % " ".join([self.name] +\ 47 | [elem.to_lisp() if hasattr(elem, "name") else\ 48 | repr(elem).replace("'", '"') if elem != '"' else '"\\""' 49 | for elem in self]) 50 | 51 | @property 52 | def descendants(self): 53 | for child in self: 54 | if type(child) == Node: 55 | for gc in child.descendants: 56 | yield gc 57 | yield child 58 | 59 | def compare_trees(t1, t2, indices): 60 | for ind in indices: 61 | t1 = t1[ind] 62 | t2 = t2[ind] 63 | return [equal_trees(x, y) for x,y in zip(t1, t2)] 64 | 65 | def equal_trees(t1, t2): 66 | if type(t1) != Node or type(t2) != Node: 67 | return t1 == t2 68 | return type(t1) == type(t2) and t1.name == t2.name and\ 69 | all(equal_trees(c1, c2) for c1, c2 in zip(t1, t2) if type(t1) == Node) 70 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # pymetaterp 2 | 3 | This is a python AST builder that uses no Python modules. A longer stackless version is available for easier porting. `single_file.py` is a stand-alone 502 lines script. 4 | 5 | Its (also) just another parsing expression grammar (PEG) based parser with one major difference. The parsed grammar is interpreted instead of compiled. This makes it easy to modify the language (by editing its grammar) *as well as* the language that grammar is written in (and the language of *that* grammar). 6 | 7 | [This is a **pre-release** of sorts. There are probably some errors and missing information.] 8 | 9 | ## Download and run 10 | 11 | git clone https://github.com/asrp/pymetaterp 12 | cd pymetaterp 13 | python single_file.py 14 | 15 | or 16 | 17 | python single_file.py filename.py 18 | 19 | This will print out the AST of the given file (or `single_file.py`'s own AST). Sample beginning of the output: 20 | 21 | file_input 22 | regular_assign 23 | testlist 24 | NAME 25 | str 'NAME' 26 | NAME 27 | str 'FLAGS' 28 | 29 | To run files from the library 30 | 31 | python test/boot_test.py 32 | python test/python_parse_test.py 33 | 34 | ## Files 35 | 36 | `single_file.py` is mainly for demonstration. This module is otherwise separated into files. There are many files but they are mostly separate. The import dependencies is 37 | 38 | util.py 39 | boot.py 40 | boot_stackless.py 41 | python.py 42 | 43 | Other files have no imports. To get something useful, you'll have to import multiple files. See `test/python_parse_test.py` and `test/boot_test.py` for some examples. 44 | 45 | ## Repl 46 | 47 | An obvious thing *missing* is the grammar read-eval-print loop (repl) so the interpreter can be fed one rule at a time, parsing subsequence input using the rules seen so far. 48 | 49 | ## Source reading order 50 | 51 | I'd suggest reading `boot.py` and `bootstrap` in `boot_grammar.py` first. The two form the core and together with `boot_tree.py`, they can regenerate `boot_tree`. 52 | 53 | Then `boot_stackless` is the same as `boot.py` but doesn't use the Python call stack/recursion for parsing. 54 | 55 | `python.py` adds functionality to the `boot.py` interpreter. `diff` in `boot_grammar.py` adds the syntax for those. 56 | 57 | Finally, `python_grammar.py` contains the python grammar to be finally parsed. 58 | 59 | ## Python language parsed 60 | 61 | The module builds the AST for Python 2.x programs. It is able to parse all of Python 2.x (in fact, it contains a slightly modified version of the Python 2.x grammar) but is less lenient with whitespaces. For example, parsing 62 | 63 | from my_module import (var1, var2, 64 | var3, var4) 65 | 66 | gives an error. 67 | 68 | *Since this is a pre-release, there are likely bugs with parts of the language I don't use so often. It _can_ build the AST for all files included here.* 69 | 70 | ## Gramamr language differences 71 | 72 | The beginning of `boot_grammar.py` self-describes the grammar. Its a PEG so all "or" (`|`) returns the first match and "and" and "quantified" (`*, +, ?`) are greedy. 73 | 74 | name = (letter | '_') (letter | digit | '_')* 75 | expr = apply | exactly | token | parenthesis | output 76 | 77 | exactly! = "'" {(escaped_char | ~'\'' anything)*} "'" 78 | token! = "\"" {(escaped_char | ~'"' anything)*} "\"" 79 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'} 80 | apply! = ('\t'|' ')* {name} 81 | parenthesis = "(" {or} ")" 82 | output! = "{" {or} "}" 83 | 84 | not = "~" {expr=negation} | expr 85 | quantified = not (('*' | '+' | '?')=quantifier)? 86 | bound = quantified ('=' {name=inline})? 87 | and = bound* 88 | or = and ("|" {and})* 89 | 90 | rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})} 91 | grammar = {rule*} spaces 92 | 93 | The main difference from other PEG. 94 | 95 | - output rule: `a {b c} d` will match the concatenation of `a b c d` but only return what matched `b c`. 96 | - quantifier collapse: `letter letter*` returns a list rather than a pair with the second element being a list matching `letter*`. 97 | - nested and collapse: `a (b (c d)) e` has the same output as `a b c d e` (see inline below if some pairs need to be explicitly grouped). 98 | - node collapsing: nodes in the output with only one child are replaced by their parent, unless the `!` ("don't collapse") flag is set for that node. 99 | - inline: shamelessly taken [Ohm](https://github.com/ohm) but with a slightly different interpretation. `expression=name` creates a node named `name` wrapping the output of `expression`. 100 | - rule replacement: having a second `rule_name = expression` line replaces the first definition of `rule_name` (instead of appending into an or). 101 | - two basic tokens: there are two basic token types: `'a'` (single quote) and `"a"` (double quote). The double quoted token allows whitespace before matching. 102 | 103 | ## Regenerating boot_tree.py 104 | 105 | Create some tree `match_tree` using `Interpreter.match` and call `save` on the result. 106 | 107 | match_tree.save("tree.py") 108 | 109 | ## Left recursion 110 | 111 | While "[PEG/packrat parsers can support left-recursion]((http://www.vpri.org/pdf/tr2007002_packrat.pdf))", the tree output isn't the one we want. The python functions `reformat_binary` and `reformat_atom` fixes a parsed tree's ouput. 112 | 113 | ## Source oddities 114 | 115 | ### Two hard-coded rules 116 | 117 | if root[NAME] == "anything": 118 | return pop(self.input) 119 | elif root[NAME] == "void": 120 | return 121 | 122 | ### Hard-coded semantics for tokens 123 | 124 | if name == "token": 125 | while pop(self.input) in self.whitespace: 126 | if self.input[0][self.input[1]] == '\\': 127 | pop(self.input) 128 | self.input[1] -= 1 129 | if name == "token" and root[0].isalpha(): 130 | top = pop(self.input) 131 | if top.isalnum() or top == '_': 132 | raise MatchError("Prefix matched but didn't end.") 133 | self.input[1] -= 1 134 | 135 | ## Optimization 136 | 137 | Some effort were made to make these files short (especially `single_file.py`) but not too much. There are still some asserts around and commented print statements that can be useful for debugging. The final goal is, of course, to reduce the program's complexity and verbosity, not its line count. 138 | 139 | ## Missing features 140 | 141 | Features/bloat from a longer version of this program not (yet?) moved over: 142 | 143 | - Debugging tree of nodes visited and their input and output 144 | - Function arguments (its in the grammar but not the interpreter) 145 | - Nested list inputs (its also in the grammar but not the interpreter) 146 | - name, args, flags, body as parameters instead of positional children 147 | - ~~Memoization~~ 148 | - ~~Matched input start and end positions~~ 149 | - Exact python expression matching for predicate, action and rule value. `balanced` is used as a simpler heuristic for now. 150 | 151 | ## Removing features 152 | 153 | To get a smaller file with just the basics. 154 | 155 | patch -R pymetaterp/python.py < patches/python_pos.patch 156 | patch -R pymetaterp/python.py < patches/python_memoizer.patch 157 | patch -R pymetaterp/boot_stackless.py < patches/boot_pos.patch 158 | patch -R pymetaterp/boot_stackless.py < patches/boot_memoizer.patch 159 | 160 | ## Readings 161 | 162 | - [Ometa](http://www.tinlizzie.org/ometa/) - Warth's thesis reads very well. 163 | - [PEG and packrat parser](http://bford.info/packrat/) 164 | - [Packrat Parsers Can Support Left Recursion](http://www.vpri.org/pdf/tr2007002_packrat.pdf) 165 | 166 | ## Other similar projects 167 | 168 | - [parsimonious](https://github.com/erikrose/parsimonious) 169 | - [Pymeta](https://pypi.python.org/pypi/PyMeta/) 170 | - [pyparsing](http://pyparsing.wikispaces.com/) 171 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | setup(name='pymetaterp', 3 | version='1.0', 4 | description='A python parser that builds python ASTs in 502 lines of python without using modules', 5 | url='https://github.com/asrp/pymetaterp', 6 | author='asrp', 7 | author_email='asrp@email.com', 8 | packages=['pymetaterp'], 9 | keywords='parser peg python minimal') 10 | -------------------------------------------------------------------------------- /single_file.py: -------------------------------------------------------------------------------- 1 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3] 2 | inf = float("inf") 3 | 4 | class MatchError(Exception): 5 | pass 6 | 7 | class Node(list): 8 | def __init__(self, name=None, value=None): 9 | list.__init__(self, value if value is not None else []) 10 | self.name = name 11 | 12 | def __repr__(self): 13 | return "%s%s" % (self.name, list.__repr__(self)) 14 | 15 | def pprint(self, indent=0): 16 | print " "*indent + self.name 17 | for child in self: 18 | if not hasattr(child, "pprint"): 19 | print " "*(indent + 1), type(child).__name__, repr(child) 20 | else: 21 | child.pprint(indent + 2) 22 | 23 | def simple_wrap_tree(root): 24 | if type(root) != list: 25 | return root 26 | return Node(root[0], map(simple_wrap_tree, root[1:])) 27 | 28 | def pop(input): 29 | input[1] += 1 30 | try: 31 | return input[0][input[1]] 32 | except IndexError: 33 | raise MatchError("EOF") 34 | 35 | def to_list(output): 36 | return output if getattr(output, "name", None) == "And" else\ 37 | [] if output is None else\ 38 | [output] 39 | 40 | def to_node(outputs): 41 | outputs = [elem for output in outputs for elem in to_list(output)] 42 | return outputs[0] if len(outputs) == 1 else\ 43 | None if len(outputs) == 0 else\ 44 | "".join(outputs) if all(type(output) == str for output in outputs)\ 45 | else Node("And", outputs) 46 | 47 | class Interpreter: 48 | def __init__(self, grammar_tree, whitespace="\t\n\r \\"): 49 | self.rules = {rule[NAME][0]:rule for rule in grammar_tree} 50 | self.whitespace = whitespace 51 | 52 | def eval(self, root): 53 | self.locals['self'] = self 54 | output = eval(root, globals(), self.locals) 55 | del self.locals['self'] 56 | return output 57 | 58 | def match(self, root, new_input=None, new_pos=-1): 59 | """ >>> g.match(g.rules['grammar'][-1], "x='y'") """ 60 | if new_input is not None: 61 | self.input = [new_input, new_pos] 62 | self.indentation = [0] 63 | self.locals = {} 64 | old_input = self.input[:] 65 | name = root.name 66 | if name in ["and", "args", "output"]: 67 | outputs = [self.match(child) for child in root] 68 | if any(child.name == "output" for child in root): 69 | outputs = [output for child, output in zip(root, outputs) 70 | if child.name == "output"] 71 | elif any(child.name == "rule_value" for child in root): 72 | outputs = [output for child, output in zip(root, outputs) 73 | if child.name == "rule_value"] 74 | assert(len(outputs) == 1) 75 | elif name == "quantified": 76 | assert(root[1].name == "quantifier") 77 | lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]] 78 | outputs = [] 79 | while len(outputs) < upper: 80 | last_input = self.input[:] 81 | try: 82 | outputs.append(self.match(root[0])) 83 | except MatchError: 84 | self.input = last_input[:] 85 | break 86 | if lower > len(outputs): 87 | raise MatchError("Matched %s < %s times" % (len(outputs), lower)) 88 | elif name == "or": 89 | for child in root: 90 | try: 91 | return self.match(child) 92 | except MatchError: 93 | self.input = old_input[:] 94 | raise MatchError("All Or matches failed") 95 | elif name in ["exactly", "token"]: 96 | if name == "token": 97 | while pop(self.input) in self.whitespace: 98 | if self.input[0][self.input[1]] == '\\': 99 | pop(self.input) 100 | self.input[1] -= 1 101 | for char in root[0]: 102 | if pop(self.input) != char: 103 | raise MatchError("Not exactly %s" % root[0]) 104 | if name == "token" and root[0].isalpha(): 105 | top = pop(self.input) 106 | if top.isalnum() or top == '_': 107 | raise MatchError("Prefix matched but didn't end.") 108 | self.input[1] -= 1 109 | return root[0] 110 | elif name == "apply": 111 | #import inspect 112 | #print " "*(len(inspect.stack())-9), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11] 113 | if root[NAME] == "anything": 114 | return pop(self.input) 115 | elif root[NAME] == "void": 116 | return 117 | old_locals = self.locals 118 | self.locals = {} 119 | try: 120 | outputs = self.match(self.rules[root[NAME]][BODY]) 121 | finally: 122 | self.locals = old_locals 123 | if root[NAME] == "escaped_char": 124 | chars = dict(["''", '""', "t\t", "n\n", "r\r", "b\b", "f\f", "\\\\"]) 125 | return chars[outputs[-1]] 126 | and_node = getattr(outputs, "name", None) == "And" 127 | make_node = "!" in self.rules[root[NAME]][FLAGS] or\ 128 | (and_node and len(outputs) > 1) 129 | if not make_node: 130 | return outputs 131 | return Node(root[NAME], to_list(outputs)) 132 | elif name in "bound": 133 | if root[1].name == "inline": 134 | return Node(root[1][0], to_list(self.match(root[0]))) 135 | else: # bind 136 | self.locals[root[1][0]] = self.match(root[0]) 137 | return 138 | elif name == "negation": 139 | try: 140 | self.match(root[0]) 141 | except MatchError: 142 | self.input = old_input 143 | return None 144 | raise MatchError("Negation true") 145 | elif name == "rule_value": 146 | return self.eval(root[0]) 147 | elif name == "predicate": 148 | output = self.eval(root[0]) 149 | if not output: 150 | raise MatchError("Predicate evaluates to false") 151 | return None if output == True else Node("predicate", [output]) 152 | elif name == "action": 153 | self.locals['self'] = self 154 | exec(root[0], globals(), self.locals) 155 | del self.locals['self'] 156 | return 157 | elif name == "lookahead": 158 | output = self.match(root[0]) 159 | self.input = old_input[:] 160 | return output 161 | else: 162 | raise Exception("Unknown operator %s" % name) 163 | return to_node(outputs) 164 | 165 | def reformat_atom(atom, trailers): 166 | output = atom 167 | for trailer in to_list(trailers): 168 | if trailer.name == "arglist": 169 | output = Node("__call__", [output, trailer]) 170 | elif trailer.name == "NAME": 171 | output = Node("__getattr__", [output, Node("NAME", trailer)]) 172 | elif trailer.name == "subscriptlist": 173 | output = Node("__getitem__", [output] + trailer) 174 | else: 175 | raise Exception("Unknown trailer %s" % trailer.name) 176 | return output 177 | 178 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=", 179 | "in", "not in", "is not", "is"), 180 | ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"), 181 | ("*", "/", "%", "//"), ("**",)) 182 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops} 183 | expr_ops = binary_ops[1:] 184 | 185 | def reformat_binary(start, tokens): 186 | def parse(lhs, tokens, index=0): 187 | threshold = priority[tokens[index][0][0]] 188 | while index < len(tokens): 189 | op, rhs = tokens[index] 190 | op = op[0] 191 | if priority[op] < threshold: 192 | break 193 | index += 1 194 | while index < len(tokens) and\ 195 | priority[tokens[index][0][0]] > priority[op]: 196 | rhs, index = parse(rhs, tokens, index) 197 | lhs = Node("__binary__", [op, lhs, rhs]) 198 | return (lhs, index) 199 | if not tokens: 200 | return start 201 | tokens = zip(tokens[::2], tokens[1::2]) 202 | lhs, index = start, 0 203 | while index < len(tokens): 204 | lhs, index = parse(lhs, tokens, index) 205 | return lhs 206 | 207 | def any_token(input, binary=True): 208 | ops = binary_ops if binary else expr_ops 209 | old_input = input[:] 210 | for tokens in ops: 211 | for token in tokens: 212 | if all(pop(input) == char for char in token): 213 | return token 214 | input[:] = old_input[:] 215 | return False 216 | 217 | grammar = r""" 218 | expr = apply | exactly | token | parenthesis | output | list 219 | | rule_value | predicate | action 220 | 221 | exactly! = "'" {(escaped_char | ~'\'' anything)*} "'" 222 | token! = "\"" {(escaped_char | ~'"' anything)*} "\"" 223 | apply! = indentation? {name ('(' {balanced=args} ')')?} 224 | parenthesis = "(" {or} ")" 225 | output! = "{" {or} "}" 226 | list! = "[" {or} "]" 227 | predicate! = "?(" {balanced} ')' 228 | action! = "!(" {balanced} ')' 229 | rule_value! = "->" hspaces {(escaped_char | ~'\n' anything)*} 230 | 231 | not = "~" "~" {expr=lookahead} | "~" {expr=negation} | expr 232 | quantified = not (('*' | '+' | '?')=quantifier)? 233 | bound = ":" {name=bind} 234 | | quantified (':' {name=bind} | '=' {name=inline})? 235 | and = bound* 236 | or = and ("|" {and})* 237 | 238 | rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})} 239 | grammar = {rule*} spaces 240 | 241 | comment = '#' (~'\n' anything)* 242 | indentation = (hspaces ('\r' '\n' | '\r' | '\n'))* hspacesp 243 | name = (letter | '_') (letter | digit | '_')* 244 | balanced = (escaped_char | '(' balanced ')' | ~')' anything)* 245 | """ 246 | 247 | python_grammar = r""" 248 | single_input = EMPTY_LINE | simple_stmt | (compound_stmt EMPTY_LINE) 249 | file_input = (EMPTY_LINE | SAME_INDENT stmt)* ENDMARKER 250 | eval_input = testlist NEWLINE? EMPTY_LINE* ENDMARKER 251 | 252 | decorator! = "@" {dotted_name ("(" {arglist} ")")?} NEWLINE 253 | decorators! = decorator+ 254 | decorated = decorators (classdef | funcdef) 255 | funcdef = "def" {NAME} "(" {parameters | void=parameters} ")" ":" {suite} 256 | # Check order validity elsewhere (at most one remaining_args and one kwargs) 257 | parameters! = {fpdef_opt (comma {fpdef_opt})*} comma? 258 | 259 | fpdef = NAME | "(" fplist ")" 260 | fpdef_opt = fpdef ("=" {test})? | "*" {NAME=remaining_args} | "**" {NAME=kwargs} 261 | fplist = {fpdef (comma {fpdef})*} comma? 262 | 263 | stmt = compound_stmt | simple_stmt 264 | simple_stmt = {small_stmt (";" {small_stmt})*} ";"? NEWLINE 265 | small_stmt = print_stmt | del_stmt | pass_stmt | flow_stmt | comment 266 | | import_stmt | global_stmt | exec_stmt | assert_stmt | expr_stmt 267 | 268 | expr_stmt = aug_assign | regular_assign | testlist 269 | aug_assign_symbol = "+=" | "-=" | "*=" | "/=" | "%=" | "&=" 270 | | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//=" 271 | aug_assign = testlist aug_assign_symbol=operation (yield_expr|testlist) 272 | regular_assign = testlist ("=" {yield_expr|testlist})+ 273 | # For normal assignments, additional restrictions enforced by the interpreter 274 | print_stmt! = "print" { {test ("," {test})*} ","? 275 | | ">>" test ( ("," test)+ ","? )? | void} 276 | del_stmt! = "del" hspacesp {exprlist} 277 | pass_stmt! = "pass" {} 278 | flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 279 | break_stmt! = "break" {} 280 | continue_stmt! = "continue" {} 281 | return_stmt! = "return" {testlist?} 282 | yield_stmt = yield_expr 283 | raise_stmt! = "raise" {(test ("," test ("," test))?)?} 284 | import_stmt = import_name | import_from 285 | import_name = "import" {import_names} 286 | import_names! = dotted_as_name ("," {dotted_as_name})* 287 | import_from! = "from" {"."* dotted_name | "."+} 288 | "import" {"*" | "(" {import_as_names} ")" | import_as_names} 289 | import_as_name = NAME ("as" {NAME})? 290 | dotted_as_name = dotted_name ("as" {NAME})? 291 | import_as_names! = {import_as_name ("," {import_as_name})*} ","? 292 | dotted_name = NAME ("." {NAME})* 293 | global_stmt = "global" NAME ("," NAME)* 294 | exec_stmt! = "exec" {expr ("in" {test} ("," {test})?)?} 295 | assert_stmt! = "assert" {test ("," test)?} 296 | 297 | compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt 298 | | funcdef | classdef | decorated 299 | if_stmt = ("if" {test} ":" {suite})=single_if 300 | (("elif" {test} ":" {suite})=single_if)* 301 | (("else" ":" {void=gen_true suite})=single_if)? 302 | while_stmt = "while" {test} ":" {suite ("else" ":" {suite})?} 303 | for_stmt = "for" {exprlist} "in" {testlist} ":" {suite} {{"else"} ":" {suite=elseblock}}? 304 | try_stmt! = "try" ":" {suite} 305 | {(({exception} ":" {suite})=except_clause)+=except_clauses 306 | ("else" ":" suite)? 307 | ("finally" ":" suite)? 308 | | "finally" ":" suite} 309 | with_stmt = "with" with_item ("," with_item)* ":" suite 310 | with_item = test ("as" expr)? 311 | exception! = "except" {(test (("as" | ",") {test})?)?} 312 | suite = NEWLINE INDENT {(SAME_INDENT stmt | EMPTY_LINE)+} DEDENT | simple_stmt 313 | 314 | testlist = {test ("," {test})*} ","? 315 | yield_expr! = "yield" {testlist?} 316 | 317 | test = lambdef | or_test ("if" {or_test} {("else" {test})?})? 318 | or_test = and_test ("or" {and_test})* 319 | and_test = not_test ("and" {not_test})* 320 | not_test = ("not" {not_test})=not_test | comparison 321 | 322 | comparison = factor:start (hspaces {?(any_token(self.input))} 323 | hspaces {factor})*:oper_and_atoms 324 | -> reformat_binary(start, oper_and_atoms) 325 | expr = factor:start (hspaces {?(any_token(self.input, binary=False))} 326 | hspaces {factor})*:oper_and_atoms 327 | -> reformat_binary(start, oper_and_atoms) 328 | 329 | factor = ("+"|"-"|"~")* power 330 | power = trailed_atom ("**" factor)? 331 | trailed_atom = atom:atom trailer*:trailers -> reformat_atom(atom, trailers) 332 | atom = "(" spaces {parenthesis} spaces ")" 333 | | "[" spaces {listmaker | void=listmaker} spaces "]" 334 | | "{" spaces {dictmaker} spaces "}" 335 | | "{" {setmaker} spaces "}" 336 | | "`" {(stmt | small_stmt)=thunk} "`" 337 | | STRINGS | NAME | NUMBER 338 | parenthesis = yield_expr | testlist_comp=generator | tuple 339 | | test | void=no_param 340 | listmaker! = (test list_for list_iter*)=listcomp 341 | | {test (comma {test})*} comma? 342 | testlist_comp = test list_for list_iter* 343 | tuple! = ({test} comma)+ test? 344 | lambdef! = "lambda" {parameters? | void=parameters} ":" {test} 345 | trailer = "(" spaces {arglist} spaces ")" 346 | | "[" spaces {subscriptlist} spaces "]" 347 | | "." {NAME} 348 | subscriptlist! = subscript=subscript ("," subscript=subscript)* ","? 349 | subscript = "..." | ({test?=start} ":" {test?=stop} {step?})=slice | test 350 | exprlist = {expr ("," {expr})*} ","? 351 | step! = ":" {test?} 352 | dictmaker! = ({test} ":" {test} {list_for} {list_iter*})=dictcomp 353 | | {({test} ":" {test})=pair ((comma {test} ":" {test})=pair)*} comma? 354 | | void 355 | 356 | setmaker! = test (list_for list_iter* | (("," test)* ","?)) 357 | 358 | classdef = "class" {NAME} {("(" {testlist?} ")")?=parents} ":" {suite} 359 | 360 | arglist! = ({argument} comma)* ( "**" {test=kwargs} 361 | | "*" {test=remaining_args ("," keyword_arg)* 362 | ("," "**" {test=kwargs})?} 363 | | {argument | void} ) 364 | comma? 365 | comma = "," spaces 366 | 367 | argument = keyword_arg | listcomp_arg 368 | keyword_arg = {test} "=" {test} 369 | listcomp_arg = test (list_for list_iter*)? 370 | 371 | list_iter = list_for | list_if 372 | list_for = spaces "for" {exprlist} "in" {or_test} # {testlist_safe} 373 | list_if! = spaces "if" {or_test} 374 | 375 | testlist_safe = or_test ((',' or_test)+ ','?)? 376 | testlist1 = test ("," test)* 377 | 378 | comment! = '#' {(~'\n' {anything})*} 379 | NUMBER! = hspaces digit+:s -> int("".join(n[0] for n in s)) 380 | # Probably need to check that the result isn't a reserved word. 381 | NAME! = hspaces {((letter | '_') (letter | digit | '_')*)} 382 | STRINGS = STRING (spaces {STRING})* 383 | STRING! = hspaces stype? '"' '"' '"' {(escaped_char | ~('"' '"' '"') {anything})*} '"' '"' '"' 384 | | hspaces stype? '\'' {(escaped_char | ~'\'' anything)*} '\'' 385 | | hspaces stype? '"' {(escaped_char | ~'"' anything)*} '"' 386 | stype! = 'r'|'b' 387 | EMPTY_LINE = (hspaces comment? ('\n' | '\r'))=EMPTY_LINE 388 | NEWLINE = hspaces (comment hspaces)? ('\n' | '\r') 389 | SAME_INDENT = hspaces:s ?(self.indentation[-1] == (len(s) if s else 0)) 390 | ENDMARKER = ~anything 391 | INDENT = ~~hspaces:s !(self.indentation.append(len(s) if s else 0)) 392 | DEDENT = !(self.indentation.pop()) 393 | """ 394 | 395 | extra = r""" 396 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'} 397 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z' 398 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' 399 | hspaces = (' ' | '\t' | escaped_linebreak)* 400 | hspacesp = (' ' | '\t' | escaped_linebreak)+ 401 | escaped_linebreak = '\\' {'\n'} 402 | space = '\t'|'\n'|'\r'|' '|comment 403 | spaces = space* 404 | """ 405 | 406 | tree = ['And', 407 | ['rule', ['rule_name', 'name'], ['flags'], ['args'], 408 | ['and', 409 | ['or', ['apply', 'letter'], ['exactly', '_']], 410 | ['quantified', 411 | ['or', ['apply', 'letter'], ['apply', 'digit'], ['exactly', '_']], 412 | ['quantifier', '*']]]], 413 | ['rule', ['rule_name', 'expr'], ['flags'], ['args'], 414 | ['or', 415 | ['apply', 'apply'], ['apply', 'exactly'], ['apply', 'token'], 416 | ['apply', 'parenthesis'], ['apply', 'output']]], 417 | ['rule', ['rule_name', 'exactly'], ['flags', '!'], ['args'], 418 | ['and', 419 | ['token', "'"], 420 | ['output', 421 | ['quantified', 422 | ['or', ['apply', 'escaped_char'], 423 | ['and', ['negation', ['exactly', "'"]], ['apply', 'anything']]], 424 | ['quantifier', '*']]], 425 | ['token', "'"]]], 426 | ['rule', ['rule_name', 'token'], ['flags', '!'], ['args'], 427 | ['and', 428 | ['token', '"'], 429 | ['output', 430 | ['quantified', 431 | ['or', 432 | ['apply', 'escaped_char'], 433 | ['and', ['negation', ['exactly', '"']], ['apply', 'anything']]], 434 | ['quantifier', '*']]], 435 | ['token', '"']]], 436 | ['rule', ['rule_name', 'escaped_char'], ['flags', '!'], ['args'], ['and', 437 | ['exactly', '\\'], 438 | ['output', ['or'] + [['exactly', s] for s in 'nrtbf"\'\\']]]], 439 | ['rule', ['rule_name', 'apply'], ['flags', '!'], ['args'], 440 | ['and', ['quantified', ['or', ['exactly', '\t'], ['exactly', ' ']], 441 | ['quantifier', '*']], 442 | ['output', ['apply', 'name']]]], 443 | ['rule', ['rule_name', 'parenthesis'], ['flags'], ['args'], 444 | ['and', ['token', '('], ['output', ['apply', 'or']], ['token', ')']]], 445 | ['rule', ['rule_name', 'output'], ['flags', '!'], ['args'], 446 | ['and', ['token', '{'], ['output', ['apply', 'or']], ['token', '}']]], 447 | ['rule', ['rule_name', 'not'], ['flags'], ['args'], ['or', 448 | ['and', 449 | ['token', '~'], 450 | ['output', ['bound', ['apply', 'expr'], ['inline', 'negation']]]], 451 | ['apply', 'expr']]], 452 | ['rule', ['rule_name', 'quantified'], ['flags'], ['args'], 453 | ['and', 454 | ['apply', 'not'], 455 | ['quantified', 456 | ['bound', 457 | ['or', ['exactly', '*'], ['exactly', '+'], ['exactly', '?']], 458 | ['inline', 'quantifier']], 459 | ['quantifier', '?']]]], 460 | ['rule', ['rule_name', 'bound'], ['flags'], ['args'], 461 | ['and', 462 | ['apply', 'quantified'], 463 | ['quantified', 464 | ['and', 465 | ['exactly', '='], 466 | ['output', ['bound', ['apply', 'name'], ['inline', 'inline']]]], 467 | ['quantifier', '?']]]], 468 | ['rule', ['rule_name', 'and'], ['flags'], ['args'], 469 | ['quantified', ['apply', 'bound'], ['quantifier', '*']]], 470 | ['rule', ['rule_name', 'or'], ['flags'], ['args'], ['and', 471 | ['apply', 'and'], 472 | ['quantified', 473 | ['and', ['token', '|'], ['output', ['apply', 'and']]], 474 | ['quantifier', '*']]]], 475 | ['rule', ['rule_name', 'rule'], ['flags'], ['args'], 476 | ['and', 477 | ['apply', 'spaces'], 478 | ['output', 479 | ['and', 480 | ['bound', ['apply', 'name'], ['inline', 'rule_name']], 481 | ['bound', 482 | ['quantified', ['exactly', '!'], ['quantifier', '?']], 483 | ['inline', 'flags']], 484 | ['bound', ['apply', 'and'], ['inline', 'args']], 485 | ['and', ['token', '='], ['output', ['apply', 'or']]]]]]], 486 | ['rule', ['rule_name', 'grammar'], ['flags'], ['args'], 487 | ['and', 488 | ['output', ['quantified', ['apply', 'rule'], ['quantifier', '*']]], 489 | ['apply', 'spaces']]], 490 | ['rule', ['rule_name', 'letter'], ['flags'], ['args'], 491 | ['or'] + [['exactly', s] 492 | for s in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']], 493 | ['rule', ['rule_name', 'digit'], ['flags'], ['args'], 494 | ['or'] + [['exactly', s] for s in '0123456789']], 495 | ['rule', ['rule_name', 'space'], ['flags'], ['args'], 496 | ['or'] + [['exactly', s] for s in '\t\n\r ']], 497 | ['rule', ['rule_name', 'spaces'], ['flags'], ['args'], 498 | ['quantified', ['apply', 'space'], ['quantifier', '*']]]] 499 | 500 | if __name__ == "__main__": 501 | import sys 502 | i1 = Interpreter(simple_wrap_tree(tree)) 503 | match_tree1 = i1.match(i1.rules['grammar'][-1], grammar + extra) 504 | i2 = Interpreter(match_tree1) 505 | match_tree2 = i2.match(i2.rules['grammar'][-1], python_grammar + extra) 506 | pyi = Interpreter(match_tree2, whitespace="\t \\") 507 | ast = pyi.match(pyi.rules['file_input'][-1], open(sys.argv[-1]).read()) 508 | ast.pprint() 509 | -------------------------------------------------------------------------------- /test/boot_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | sys.setrecursionlimit(5000) 4 | from pymetaterp.util import simple_wrap_tree 5 | from pymetaterp import boot_tree, boot_stackless as boot, boot_grammar 6 | 7 | grammar = boot_grammar.bootstrap + boot_grammar.extra 8 | i1 = boot.Interpreter(simple_wrap_tree(boot_tree.tree)) 9 | match_tree = i1.match(i1.rules['grammar'][-1], grammar) 10 | i2 = boot.Interpreter(match_tree) 11 | match_tree2 = i2.match(i2.rules['grammar'][-1], grammar) 12 | i3 = boot.Interpreter(match_tree2) 13 | for i in range(3): 14 | match_tree3 = i3.match(i3.rules['grammar'][-1], grammar) 15 | i3 = boot.Interpreter(match_tree3) 16 | grammar += boot_grammar.diff 17 | match_tree3 = i3.match(i3.rules['grammar'][-1], grammar) 18 | print match_tree == match_tree2 19 | -------------------------------------------------------------------------------- /test/compiled_python_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | sys.setrecursionlimit(5000) 4 | from pymetaterp.util import simple_wrap_tree 5 | from pymetaterp import boot_tree, boot_grammar 6 | from pymetaterp.boot_compiled import to_python, match 7 | from pymetaterp import python_compiled, python_grammar 8 | import os 9 | 10 | grammar = boot_grammar.bootstrap + boot_grammar.extra 11 | t1 = list(simple_wrap_tree(boot_tree.tree)) 12 | t2 = match(t1, grammar) 13 | t3 = match(t2, grammar + boot_grammar.diff) 14 | pytree = match(t3, python_grammar.full_definition + python_grammar.extra) 15 | srctree = python_compiled.match(pytree, open(os.path.join("test", "python_ex.py")).read()) 16 | srctree.pprint() 17 | -------------------------------------------------------------------------------- /test/compiled_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | sys.setrecursionlimit(5000) 4 | from pymetaterp.util import simple_wrap_tree 5 | from pymetaterp import boot_tree, boot_grammar 6 | from pymetaterp.boot_compiled import to_python, match 7 | 8 | t1 = list(simple_wrap_tree(boot_tree.tree)) 9 | grammar = boot_grammar.bootstrap + boot_grammar.extra 10 | t2 = match(t1, grammar) 11 | t3 = match(t2, grammar) 12 | assert(to_python(t2) == to_python(t3)) 13 | -------------------------------------------------------------------------------- /test/python_parse_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | sys.setrecursionlimit(5000) 4 | from pymetaterp.util import simple_wrap_tree 5 | from pymetaterp import boot_grammar, boot_tree, boot_stackless as boot_terp, python, python_grammar 6 | 7 | grammar = boot_grammar.bootstrap + boot_grammar.extra 8 | i1 = boot_terp.Interpreter(simple_wrap_tree(boot_tree.tree)) 9 | # Not needed, just double checking 10 | match_tree = i1.match(i1.rules['grammar'][-1], grammar) 11 | i2 = boot_terp.Interpreter(match_tree) 12 | match_tree2 = i2.match(i2.rules['grammar'][-1], grammar + boot_grammar.diff) 13 | i3 = boot_terp.Interpreter(match_tree2) 14 | match_tree3 = i3.match(i3.rules['grammar'][-1], python_grammar.full_definition + python_grammar.extra) 15 | pyi = python.Interpreter(match_tree3) 16 | pyimatch_tree = pyi.match(pyi.rules['grammar'][-1], open("test/python_parse_test.py").read()) 17 | pyimatch_tree.pprint() 18 | print len(pyi.input[0]) == pyi.input[1] + 1 19 | --------------------------------------------------------------------------------