├── patches
    ├── boot_memoizer.patch
    ├── boot_pos.patch
    ├── python_memoizer.patch
    └── python_pos.patch
├── pymetaterp
    ├── __init__.py
    ├── boot.py
    ├── boot_compiled.py
    ├── boot_grammar.py
    ├── boot_stackless.py
    ├── boot_tree.py
    ├── python.py
    ├── python_compiled.py
    ├── python_grammar.py
    └── util.py
├── readme.md
├── setup.py
├── single_file.py
└── test
    ├── boot_test.py
    ├── compiled_python_test.py
    ├── compiled_test.py
    └── python_parse_test.py


/patches/boot_memoizer.patch:
--------------------------------------------------------------------------------
 1 | diff --git pymetaterp/boot_stackless.py pymetaterp/boot_stackless.py
 2 | index 55ce028..1793b7e 100644
 3 | --- pymetaterp/boot_stackless.py
 4 | +++ pymetaterp/boot_stackless.py
 5 | @@ -57,6 +57,7 @@ class Interpreter:
 6 |          self.input = [input, pos]
 7 |          self.stack = [Frame(root, self.input)]
 8 |          output = self.new_step()
 9 | +        self.memoizer = {}
10 |          while True:
11 |              if output is Eval:
12 |                  root = self.stack[-1].calls[len(self.stack[-1].outputs)]
13 | @@ -83,6 +84,11 @@ class Interpreter:
14 |              # print " "*len(self.stack), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11]
15 |              if root[NAME] == "anything":
16 |                  return pop(self.input)
17 | +            key = (root[NAME], id(self.input[0]), self.input[1])
18 | +            if key in self.memoizer:
19 | +                self.input = self.memoizer[key][1][:]
20 | +                return self.memoizer[key][0]
21 | +            self.stack[-1].key = key
22 |              calls.append(self.rules[root[NAME]][BODY])
23 |          elif name in ["exactly", "token"]:
24 |              if name == "token":
25 | @@ -145,9 +151,11 @@ class Interpreter:
26 |              make_node = "!" in self.rules[root[NAME]][FLAGS] or\
27 |                          (and_node and len(output) > 1)
28 |              #print len(self.stack)*" ", "returned", output
29 | -            if not make_node:
30 | -                return output
31 | -            return Node(root[NAME], to_list(output))
32 | +            if make_node:
33 | +                output = Node(root[NAME], to_list(output))
34 | +            self.memoizer[frame.key] = (output, self.input[:])
35 | +            return output
36 | +
37 |          elif name in "bound":
38 |              return Node(root[1][0], to_list(output))
39 |          elif name == "negation":
40 | 


--------------------------------------------------------------------------------
/patches/boot_pos.patch:
--------------------------------------------------------------------------------
 1 | diff --git pymetaterp/boot_stackless.py pymetaterp/boot_stackless.py
 2 | index 1793b7e..cb9470c 100644
 3 | --- pymetaterp/boot_stackless.py
 4 | +++ pymetaterp/boot_stackless.py
 5 | @@ -64,6 +64,8 @@ class Interpreter:
 6 |                  self.stack.append(Frame(root, self.input))
 7 |                  output = self.new_step()
 8 |              else:
 9 | +                if type(output) == Node:
10 | +                    output.pos = (self.stack[-1].input[1]+1, self.input[1]+1)
11 |                  self.stack.pop()
12 |                  if not self.stack:
13 |                      return output
14 | 


--------------------------------------------------------------------------------
/patches/python_memoizer.patch:
--------------------------------------------------------------------------------
 1 | --- python.py	2017-03-13 12:23:33.754710023 +0000
 2 | +++ python_memoized.py	2017-03-13 12:29:32.884700105 +0000
 3 | @@ -8,6 +8,7 @@
 4 |          self.indentation = [0]
 5 |          self.locals = {}
 6 |          self.debug = debug
 7 | +        self.memoizer = {}
 8 |          return boot.Interpreter.match(self, root, input, pos)
 9 |  
10 |      def eval(self, root):
11 | @@ -51,6 +52,12 @@
12 |              elif root[NAME] == "void":
13 |                  return
14 |              else:
15 | +                key = (root[NAME], id(self.input[0]), self.input[1],
16 | +                       tuple(self.indentation))
17 | +                if key in self.memoizer:
18 | +                    self.input = self.memoizer[key][1][:]
19 | +                    return self.memoizer[key][0]
20 | +                self.stack[-1].key = key
21 |                  calls.append(self.rules[root[NAME]][BODY])
22 |              self.stack[-1].locals = self.locals
23 |              self.locals = {}
24 | @@ -104,7 +111,9 @@
25 |          elif name == "apply":
26 |              # Need to run this line even on error
27 |              self.locals = frame.locals
28 | -            return boot.Interpreter.next_step(self)
29 | +            output = boot.Interpreter.next_step(self)
30 | +            self.memoizer[frame.key] = (output, self.input[:])
31 | +            return output
32 |          elif name == "lookahead":
33 |              self.input = frame.input[:]
34 |              return output
35 | 


--------------------------------------------------------------------------------
/patches/python_pos.patch:
--------------------------------------------------------------------------------
 1 | diff --git pymetaterp/python.py pymetaterp/python.py
 2 | index 29ac20d..0a9a558 100644
 3 | --- pymetaterp/python.py
 4 | +++ pymetaterp/python.py
 5 | @@ -124,12 +129,14 @@ class Interpreter(boot.Interpreter):
 6 |  def reformat_atom(atom, trailers):
 7 |      output = atom
 8 |      for trailer in to_list(trailers):
 9 | +        pos = (output.pos[0], trailer.pos[1])
10 |          if trailer.name == "arglist":
11 | -            output = Node("__call__", [output, trailer])
12 | +            output = Node("__call__", [output, trailer], pos=pos)
13 |          elif trailer.name == "NAME":
14 | -            output = Node("__getattr__", [output, Node("NAME", trailer)])
15 | +            output = Node("__getattr__", [output, Node("NAME", trailer,
16 | +                                                       pos=trailer.pos)], pos=pos)
17 |          elif trailer.name == "subscriptlist":
18 | -            output = Node("__getitem__", [output] + trailer)
19 | +            output = Node("__getitem__", [output] + trailer, pos=pos)
20 |          else:
21 |              raise Exception("Unknown trailer %s" % trailer.name)
22 |      return output
23 | @@ -154,7 +161,7 @@ def reformat_binary(start, oper_and_atoms):
24 |              while index < len(tokens) and\
25 |                    priority[tokens[index][0][0]] > priority[op]:
26 |                  rhs, index = parse(rhs, tokens, index)
27 | -            lhs = Node("__binary__", [op, lhs, rhs])
28 | +            lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1]))
29 |          return (lhs, index)
30 |      if not oper_and_atoms:
31 |          return start
32 | 


--------------------------------------------------------------------------------
/pymetaterp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asrp/pymetaterp/ed235fa31dfc2e2d8febe324fe6c4cc17bbaa8fb/pymetaterp/__init__.py


--------------------------------------------------------------------------------
/pymetaterp/boot.py:
--------------------------------------------------------------------------------
  1 | from util import MatchError, Node
  2 | 
  3 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3]
  4 | inf = float("inf")
  5 | # input is a pair (container, pos)
  6 | 
  7 | def pop(input):
  8 |     input[1] += 1
  9 |     try:
 10 |         return input[0][input[1]]
 11 |     except IndexError:
 12 |         raise MatchError("EOF")
 13 | 
 14 | def to_list(output):
 15 |     return output  if getattr(output, "name", None) == "And" else\
 16 |            []      if output is None else\
 17 |            [output]
 18 | 
 19 | class Interpreter:
 20 |     def __init__(self, grammar_tree):
 21 |         self.rules = {rule[NAME][0]:rule for rule in grammar_tree}
 22 |         self.join_str = True
 23 | 
 24 |     def match(self, root, new_input=None, new_pos=-1):
 25 |         """ >>> g.match(g.rules['grammar'][-1], "x='y'") """
 26 |         if new_input is not None:
 27 |             self.input = [new_input, new_pos]
 28 |         old_input = self.input[:]
 29 |         name = root.name
 30 |         #print("matching %s" % name)
 31 |         if name in ["and", "args", "body", "output"]:
 32 |             outputs = [self.match(child) for child in root]
 33 |             if any(child.name == "output" for child in root):
 34 |                 outputs = [output for child, output in zip(root, outputs)
 35 |                            if child.name == "output"]
 36 |         elif name == "quantified":
 37 |             assert(root[1].name == "quantifier")
 38 |             lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]]
 39 |             outputs = []
 40 |             while len(outputs) < upper:
 41 |                 last_input = self.input[:]
 42 |                 try:
 43 |                     outputs.append(self.match(root[0]))
 44 |                 except MatchError:
 45 |                     self.input = last_input[:]
 46 |                     break
 47 |                 if last_input == self.input:
 48 |                     break
 49 |             if lower > len(outputs):
 50 |                 raise MatchError("Matched %s < %s times" % (len(outputs), lower))
 51 |         elif name == "or":
 52 |             for child in root:
 53 |                 try:
 54 |                     return self.match(child)
 55 |                 except MatchError:
 56 |                     self.input = old_input[:]
 57 |             raise MatchError("All Or matches failed")
 58 |         elif name in ["exactly", "token"]:
 59 |             if name == "token":
 60 |                 while pop(self.input) in ['\t', '\n', '\r', ' ']:
 61 |                     pass
 62 |                 self.input[1] -= 1
 63 |             if pop(self.input) == root[0]:
 64 |                 return root[0]
 65 |             else:
 66 |                 raise MatchError("Not exactly %s" % root)
 67 |         elif name == "apply":
 68 |             #print "rule %s" % root[NAME]
 69 |             if root[NAME] == "anything":
 70 |                 return pop(self.input)
 71 |             outputs = self.match(self.rules[root[NAME]][BODY])
 72 |             if root[NAME] == "escaped_char":
 73 |                 chars = dict(["''", '""', "t\t", "n\n", "r\r",
 74 |                               "b\b", "f\f", "\\\\"])
 75 |                 return chars[outputs]
 76 |             and_node = getattr(outputs, "name", None) == "And"
 77 |             make_node = "!" in self.rules[root[NAME]][FLAGS] or\
 78 |                         (and_node and len(outputs) > 1)
 79 |             if not make_node:
 80 |                 return outputs
 81 |             return Node(root[NAME], to_list(outputs))
 82 |         elif name in "bound":
 83 |             return Node(root[1][0], to_list(self.match(root[0])))
 84 |         elif name == "negation":
 85 |             try:
 86 |                 self.match(root[0])
 87 |             except MatchError:
 88 |                 self.input = old_input
 89 |                 return None
 90 |             raise MatchError("Negation true")
 91 |         else:
 92 |             raise Exception("Unknown operator %s" % name)
 93 | 
 94 |         outputs = [elem for output in outputs
 95 |                    for elem in to_list(output)]
 96 |         if len(outputs) == 1:
 97 |             return outputs[0]
 98 |         elif len(outputs) == 0:
 99 |             return None
100 |         else:
101 |             if self.join_str and all(type(output) == str for output in outputs):
102 |                 return "".join(outputs)
103 |             return Node("And", outputs)
104 | 


--------------------------------------------------------------------------------
/pymetaterp/boot_compiled.py:
--------------------------------------------------------------------------------
  1 | from util import MatchError
  2 | from pdb import set_trace as bp
  3 | 
  4 | inf = float("inf")
  5 | 
  6 | class Glob(object):
  7 |     pass
  8 | 
  9 | g = Glob()
 10 | 
 11 | class Source():
 12 |     def __init__(self, source):
 13 |         self.source = source
 14 |         self.position = -1
 15 |     def next(self):
 16 |         self.position += 1
 17 |         try:
 18 |             return self.source[self.position]
 19 |         except IndexError:
 20 |             return MatchError("EOF")
 21 | 
 22 | class Node(object):
 23 |     def __init__(self, name, children, pos=(None, None)):
 24 |         self.name = name
 25 |         self.children = children
 26 |         self.pos = pos
 27 |     def __getitem__(self, index):
 28 |         if type(self.children) == list:
 29 |             return self.children[index]
 30 |         else:
 31 |             return [self.children][index]
 32 |     def __len__(self):
 33 |         return len(self.children) if isinstance(self.children, list) else 1 if self.children else 0
 34 |     def __repr__(self):
 35 |         return "%s(%s)" % (self.name, self.children)
 36 |     def pprint(self, indent=0):
 37 |         print " "*indent + self.name
 38 |         children = [self.children] if not isinstance(self.children, list) else self.children
 39 |         for child in children:
 40 |             if not hasattr(child, "pprint"):
 41 |                 print " "*(indent + 1), type(child).__name__, repr(child)
 42 |             else:
 43 |                 child.pprint(indent + 2)
 44 | 
 45 | def to_list(value):
 46 |     return value if isinstance(value, list) else\
 47 |            [] if value is None else\
 48 |            [value]
 49 | 
 50 | def exactly(char):
 51 |     ichar = g.input.next()
 52 |     return ichar if isinstance(ichar, MatchError) or char == ichar\
 53 |         else MatchError("Not exactly %s" % char)
 54 | 
 55 | def between(start, end):
 56 |     ichar = g.input.next()
 57 |     return ichar if isinstance(ichar, MatchError) or start <= ichar <= end\
 58 |         else MatchError("Not between %s and %s" % (start, end))
 59 | 
 60 | def token(s):
 61 |     while g.input.next() in ['\t', '\n', '\r', ' ']:
 62 |         pass
 63 |     g.input.position -= 1
 64 |     for char in s:
 65 |         if g.input.next() != char:
 66 |             return MatchError("Not exactly %s" % char)
 67 |     return s
 68 | 
 69 | def or_(children):
 70 |     saved = g.input.position
 71 |     for child in children:
 72 |         g.input.position = saved
 73 |         output = child()
 74 |         if not isinstance(output, MatchError):
 75 |             return output
 76 |     g.input.position = saved
 77 |     return MatchError("No OR child matches")
 78 | 
 79 | def and_(children):
 80 |     saved = g.input.position
 81 |     outputs = []
 82 |     output_mode = False
 83 |     for child in children:
 84 |         output = child()
 85 |         if isinstance(output, MatchError):
 86 |             g.input.position = saved
 87 |             return MatchError("And match failed")
 88 |         if output_mode:
 89 |             if getattr(output, "name", None) == "out":
 90 |                 outputs.extend(to_list(output.children))
 91 |         else:
 92 |             if getattr(output, "name", None) == "out":
 93 |                 outputs = output.children
 94 |                 output_mode = True
 95 |             else:
 96 |                 outputs.extend(to_list(output))
 97 |     return "".join(outputs) if outputs and type(outputs) == list and all(type(output) == str for output in outputs) and len(outputs[0]) == 1\
 98 |         else outputs
 99 | 
100 | def out(child=lambda: None):
101 |     output = child()
102 |     return output if isinstance(output, MatchError) else Node("out", output)
103 | 
104 | def quantified(child, (_, quantifier)):
105 |     lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[quantifier]
106 |     outputs = []
107 |     count = 0
108 |     start_saved = g.input.position
109 |     while count < upper:
110 |         saved = g.input.position
111 |         output = child()
112 |         if isinstance(output, MatchError):
113 |             if count < lower:
114 |                 g.input.position = start_saved
115 |                 return MatchError("Quantified undermatch %s < %s" % (count, lower))
116 |             else:
117 |                 g.input.position = saved
118 |                 return outputs
119 |         outputs.extend(to_list(output))
120 |         count += 1
121 |     return outputs
122 | 
123 | def negation(child):
124 |     saved = g.input.position
125 |     output = child()
126 |     g.input.position = saved
127 |     return None if isinstance(output, MatchError) else MatchError("Negation_is_true")
128 | 
129 | def bound(child, (_, name)):
130 |     saved = g.input.position
131 |     output = child()
132 |     return output if isinstance(output, MatchError) else\
133 |         Node(name, output, (saved+1, g.input.position+1))
134 | 
135 | def apply_(name):
136 |     saved = g.input.position
137 |     # func, flagged
138 |     output = g.rules[name][0]()
139 |     if isinstance(output, MatchError):
140 |         return output
141 |     if name == "escaped_char":
142 |         return({"t": "\t", "n": "\n", "\\\\": "\\", "r": "\r"}.get(output, output))
143 |     if name == "balanced":
144 |         return output
145 |     if "!" in g.rules[name][1] or (isinstance(output, list) and len(output) > 1):
146 |         return Node(name, output, (saved+1, g.input.position+1))
147 |     return output
148 | 
149 | def rule_anything():
150 |     char = g.input.next()
151 |     return MatchError("End_of_file") if char is None else char
152 | 
153 | def rule_letter():
154 |     return(or_([lambda: between("a", "z"), lambda: between("A", "Z")]))
155 | 
156 | def rule_digit():
157 |     return(between("0", "9"))
158 | 
159 | def closure(child, value):
160 |     return str(value) if isinstance(child, str) or child.name in ["quantifier", "inline", "bind"] else "lambda: %s" % value
161 | 
162 | def to_python(root):
163 |     if isinstance(root, str):
164 |         return repr(root)
165 |     elif type(root) == list:
166 |         #names = [rule[0][0] for rule in root] + ["letter", "digit", "anything"]
167 |         named = ", ".join(['"%s": (rule_%s, %s)' % (rule[0][0], rule[0][0], repr("".join(rule[1])))
168 |                            for rule in root])
169 |         return "\n\n".join(to_python(child) for child in root
170 |                            if child[0][0] not in ["letter", "digit"]) +\
171 |                "\n\ng.rules.update({%s})" % named
172 |     name = root.name + "_" if root.name in ["and", "or", "apply"] else root.name
173 |     if name in ["quantifier", "inline", "bind"]:
174 |         return (name, to_python(root[0])[1:-1])
175 |     elif name == "rule":
176 |         return "def rule_%s():\n    return %s" % (root[0][0], to_python(root[-1]))
177 |     else:
178 |         children = ", ".join(closure(child, to_python(child))
179 |                              for child in root)
180 |         if name in ["and_", "or_"]:
181 |             children = "[%s]" % children
182 |         if name == "output":
183 |             name = "out"
184 |         return "%s(%s)" % (name, children)
185 | 
186 | def gen_from_tree():
187 |     import boot_tree
188 |     from util import simple_wrap_tree
189 |     return to_python(list(simple_wrap_tree(boot_tree.tree)))
190 | 
191 | def match(tree, inp):
192 |     g.rules = {'anything': (rule_anything, ''), 'letter': (rule_letter, ''),
193 |                'digit': (rule_digit, '')}
194 |     exec to_python(tree)
195 |     g.input = Source(inp)
196 |     return rule_grammar()
197 | 
198 | if __name__ == "__main__":
199 |     from boot_grammar import bootstrap
200 |     exec gen_from_tree()
201 |     #g.input = Source("foo = bar")
202 |     g.input = Source(bootstrap)
203 |     output = rule_grammar()
204 |     print to_python(output)
205 |     exec to_python(output)
206 |     g.input = Source(bootstrap)
207 |     output2 = rule_grammar()
208 |     assert(to_python(output) == to_python(output2))
209 | 


--------------------------------------------------------------------------------
/pymetaterp/boot_grammar.py:
--------------------------------------------------------------------------------
 1 | bootstrap = r"""
 2 | name = (letter | '_') (letter | digit | '_')*
 3 | expr = apply | exactly | token | parenthesis | output
 4 | 
 5 | exactly! = "'" {(escaped_char | ~'\'' anything)*} "'"
 6 | token! = "\"" {(escaped_char | ~'"' anything)*} "\""
 7 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'}
 8 | apply! = ('\t'|' ')* {name}
 9 | parenthesis = "(" {or} ")"
10 | output! = "{" {or} "}"
11 | 
12 | not = "~" {expr=negation} | expr
13 | quantified = not (('*' | '+' | '?')=quantifier)?
14 | bound = quantified ('=' {name=inline})?
15 | and = bound*
16 | or = and ("|" {and})*
17 | 
18 | rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})}
19 | grammar = {rule*} spaces
20 | """
21 | 
22 | diff = r"""
23 | comment = '#' (~'\n' anything)*
24 | hspace = ' ' | '\t' | comment
25 | indentation = (hspace* ('\r' '\n' | '\r' | '\n'))* hspace+
26 | space = '\n' | '\r' | hspace
27 | 
28 | expr = apply | exactly | token | parenthesis | output | list
29 |      | rule_value | predicate | action
30 | 
31 | list! = "[" {or} "]"
32 | predicate! = "?(" {balanced} ')'
33 | action! = "!(" {balanced} ')'
34 | rule_value! = "->" hspace* {(escaped_char | ~'\n' anything)*}
35 | apply! = indentation? {name ('(' {balanced=args} ')')?}
36 | not = "~" "~" {expr=lookahead} | "~" {expr=negation} | expr
37 | bound = ":" {name=bind}
38 |       | quantified (':' {name=bind} | '=' {name=inline})?
39 | 
40 | balanced = (escaped_char | '(' balanced ')' | ~')' anything)*
41 | """
42 | 
43 | extra = """
44 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z'
45 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
46 | space = '\t'|'\n'|'\r'|' '
47 | spaces = space*
48 | """
49 | 


--------------------------------------------------------------------------------
/pymetaterp/boot_stackless.py:
--------------------------------------------------------------------------------
  1 | from util import MatchError, Node
  2 | 
  3 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3]
  4 | inf = float("inf")
  5 | # input is a pair (container, pos)
  6 | 
  7 | class Eval:
  8 |     pass
  9 | 
 10 | class Frame:
 11 |     def __init__(self, root, input):
 12 |         self.root = root
 13 |         self.calls = []
 14 |         self.input = input[:]
 15 |         self.outputs = []
 16 | 
 17 |     def __repr__(self):
 18 |         return repr(self.calls)
 19 | 
 20 | def pop(input):
 21 |     input[1] += 1
 22 |     try:
 23 |         return input[0][input[1]]
 24 |     except IndexError:
 25 |         return MatchError("EOF")
 26 | 
 27 | def to_list(output):
 28 |     return output  if getattr(output, "name", None) == "And" else\
 29 |            []      if output is None else\
 30 |            [output]
 31 | 
 32 | def to_node(outputs, join_str):
 33 |     outputs = [elem for output in outputs
 34 |                for elem in to_list(output)]
 35 |     if len(outputs) == 1:
 36 |         return outputs[0]
 37 |     elif len(outputs) == 0:
 38 |         return None
 39 |     else:
 40 |         if join_str and all(type(output) == str for output in outputs):
 41 |             return "".join(outputs)
 42 |         return Node("And", outputs)
 43 | 
 44 | class Interpreter:
 45 |     def __init__(self, grammar_tree):
 46 |         self.rules = {rule[NAME][0]:rule for rule in grammar_tree}
 47 |         self.join_str = True
 48 | 
 49 |     def dbg(self):
 50 |         print len(self.input[0]), self.input[1]
 51 |         if len(self.input[0]) == self.input[1] + 1:
 52 |             return
 53 |         print self.input[0][self.input[1]: self.input[1] + 200]
 54 | 
 55 |     def parse(self, rule_name, input, **kwargs):
 56 |         output = self.match(self.rules[rule_name][-1], input, **kwargs)
 57 |         if type(output) == MatchError or len(self.input[0]) == self.input[1] + 1:
 58 |             return output
 59 |         return MatchError("Not all input read")
 60 | 
 61 |     def match(self, root, input=None, pos=-1):
 62 |         """ >>> g.match(g.rules['grammar'][-1], "x='y'") """
 63 |         self.input = [input, pos]
 64 |         self.stack = [Frame(root, self.input)]
 65 |         output = self.new_step()
 66 |         self.memoizer = {}
 67 |         while True:
 68 |             if output is Eval:
 69 |                 root = self.stack[-1].calls[len(self.stack[-1].outputs)]
 70 |                 self.stack.append(Frame(root, self.input))
 71 |                 output = self.new_step()
 72 |             else:
 73 |                 if type(output) == Node:
 74 |                     output.pos = (self.stack[-1].input[1]+1, self.input[1]+1)
 75 |                 self.stack.pop()
 76 |                 if not self.stack:
 77 |                     return output
 78 |                 #print len(self.stack)*" ", "returned", output
 79 |                 self.stack[-1].outputs.append(output)
 80 |                 output = self.next_step()
 81 | 
 82 |     def new_step(self):
 83 |         root = self.stack[-1].root
 84 |         name = root.name
 85 |         calls = self.stack[-1].calls
 86 |         #print len(self.stack)*" ", "matching", name
 87 |         if name in ["and", "args", "output", "or"]:
 88 |             calls.extend(root)
 89 |         elif name in ["bound", "negation", "quantified"]:
 90 |             calls.append(root[0])
 91 |         elif name == "apply":
 92 |             # print " "*len(self.stack), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11]
 93 |             if root[NAME] == "anything":
 94 |                 return pop(self.input)
 95 |             key = (root[NAME], id(self.input[0]), self.input[1])
 96 |             if key in self.memoizer:
 97 |                 self.input = self.memoizer[key][1][:]
 98 |                 return self.memoizer[key][0]
 99 |             self.stack[-1].key = key
100 |             calls.append(self.rules[root[NAME]][BODY])
101 |         elif name in ["exactly", "token"]:
102 |             if name == "token":
103 |                 while pop(self.input) in ['\t', '\n', '\r', ' ']:
104 |                     pass
105 |                 if self.input[1] == len(self.input[0]):
106 |                     return MatchError("EOF")
107 |                 self.input[1] -= 1
108 |             for char in root[0]:
109 |                 if pop(self.input) != char:
110 |                     return MatchError("Not exactly %s" % root[0])
111 |             return root[0]
112 |         return Eval
113 | 
114 |     def next_step(self):
115 |         frame = self.stack[-1]
116 |         root = frame.root
117 |         name = root.name
118 |         outputs = frame.outputs
119 |         output = outputs[-1] if outputs else None
120 |         is_error = type(output) == MatchError
121 |         finished = len(outputs) == len(frame.calls)
122 |         if is_error and name not in ["quantified", "or", "negation"]:
123 |             return output
124 |         elif not (finished or name in ["or", "quantified"]):
125 |             return Eval
126 |         if name in ["and", "args", "output"]:
127 |             if any(child.name == "output" for child in root):
128 |                 outputs = [output for child, output in zip(root, outputs)
129 |                            if child.name == "output"]
130 |             return to_node(outputs, self.join_str)
131 |         elif name == "quantified":
132 |             assert(root[1].name == "quantifier")
133 |             lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]]
134 |             if is_error:
135 |                 self.input = frame.input[:]
136 |                 outputs.pop()
137 |             #print("output len", len(outputs))
138 |             if is_error or len(outputs) == upper or frame.input == self.input:
139 |                 if lower > len(outputs):
140 |                     return MatchError("Matched %s < %s times" % (len(outputs), lower))
141 |                 else:
142 |                     return to_node(outputs, self.join_str)
143 |             else:
144 |                 frame.input = self.input[:]
145 |                 self.stack[-1].calls.append(root[0])
146 |         elif name == "or":
147 |             if is_error:
148 |                 self.input = frame.input[:]
149 |                 if finished:
150 |                     return MatchError("All Or matches failed")
151 |             else:
152 |                 return output
153 |         elif name == "apply":
154 |             if root[NAME] == "escaped_char" and not is_error:
155 |                 chars = dict(["''", '""', "t\t", "n\n", "r\r",
156 |                               "b\b", "f\f", "\\\\"])
157 |                 return chars[output]
158 |             and_node = getattr(output, "name", None) == "And"
159 |             make_node = "!" in self.rules[root[NAME]][FLAGS] or\
160 |                         (and_node and len(output) > 1)
161 |             #print len(self.stack)*" ", "returned", output
162 |             if make_node:
163 |                 output = Node(root[NAME], to_list(output))
164 |             self.memoizer[frame.key] = (output, self.input[:])
165 |             return output
166 | 
167 |         elif name in "bound":
168 |             return Node(root[1][0], to_list(output))
169 |         elif name == "negation":
170 |             if is_error:
171 |                 self.input = frame.input
172 |                 return None
173 |             else:
174 |                 return MatchError("Negation true")
175 |         else:
176 |             raise Exception("Unknown operator %s" % name)
177 |         return Eval
178 | 


--------------------------------------------------------------------------------
/pymetaterp/boot_tree.py:
--------------------------------------------------------------------------------
  1 | tree = ['And',
  2 |  ['rule',
  3 |   ['rule_name', 'name'],
  4 |   ['flags'],
  5 |   ['args'],
  6 |   ['and',
  7 |    ['or', ['apply', 'letter'], ['exactly', '_']],
  8 |    ['quantified',
  9 |     ['or', ['apply', 'letter'], ['apply', 'digit'], ['exactly', '_']],
 10 |     ['quantifier', '*']]]],
 11 |  ['rule',
 12 |   ['rule_name', 'expr'],
 13 |   ['flags'],
 14 |   ['args'],
 15 |   ['or',
 16 |    ['apply', 'apply'],
 17 |    ['apply', 'exactly'],
 18 |    ['apply', 'token'],
 19 |    ['apply', 'parenthesis'],
 20 |    ['apply', 'output']]],
 21 |  ['rule',
 22 |   ['rule_name', 'exactly'],
 23 |   ['flags', '!'],
 24 |   ['args'],
 25 |   ['and',
 26 |    ['token', "'"],
 27 |    ['output',
 28 |     ['quantified',
 29 |      ['or',
 30 |       ['apply', 'escaped_char'],
 31 |       ['and', ['negation', ['exactly', "'"]], ['apply', 'anything']]],
 32 |      ['quantifier', '*']]],
 33 |    ['token', "'"]]],
 34 |  ['rule',
 35 |   ['rule_name', 'token'],
 36 |   ['flags', '!'],
 37 |   ['args'],
 38 |   ['and',
 39 |    ['token', '"'],
 40 |    ['output',
 41 |     ['quantified',
 42 |      ['or',
 43 |       ['apply', 'escaped_char'],
 44 |       ['and', ['negation', ['exactly', '"']], ['apply', 'anything']]],
 45 |      ['quantifier', '*']]],
 46 |    ['token', '"']]],
 47 |  ['rule',
 48 |   ['rule_name', 'escaped_char'],
 49 |   ['flags', '!'],
 50 |   ['args'],
 51 |   ['and',
 52 |    ['exactly', '\\'],
 53 |    ['output',
 54 |     ['or',
 55 |      ['exactly', 'n'],
 56 |      ['exactly', 'r'],
 57 |      ['exactly', 't'],
 58 |      ['exactly', 'b'],
 59 |      ['exactly', 'f'],
 60 |      ['exactly', '"'],
 61 |      ['exactly', "'"],
 62 |      ['exactly', '\\']]]]],
 63 |  ['rule',
 64 |   ['rule_name', 'apply'],
 65 |   ['flags', '!'],
 66 |   ['args'],
 67 |   ['and',
 68 |    ['quantified',
 69 |     ['or', ['exactly', '\t'], ['exactly', ' ']],
 70 |     ['quantifier', '*']],
 71 |    ['output', ['apply', 'name']]]],
 72 |  ['rule',
 73 |   ['rule_name', 'parenthesis'],
 74 |   ['flags'],
 75 |   ['args'],
 76 |   ['and', ['token', '('], ['output', ['apply', 'or']], ['token', ')']]],
 77 |  ['rule',
 78 |   ['rule_name', 'output'],
 79 |   ['flags', '!'],
 80 |   ['args'],
 81 |   ['and', ['token', '{'], ['output', ['apply', 'or']], ['token', '}']]],
 82 |  ['rule',
 83 |   ['rule_name', 'not'],
 84 |   ['flags'],
 85 |   ['args'],
 86 |   ['or',
 87 |    ['and',
 88 |     ['token', '~'],
 89 |     ['output', ['bound', ['apply', 'expr'], ['inline', 'negation']]]],
 90 |    ['apply', 'expr']]],
 91 |  ['rule',
 92 |   ['rule_name', 'quantified'],
 93 |   ['flags'],
 94 |   ['args'],
 95 |   ['and',
 96 |    ['apply', 'not'],
 97 |    ['quantified',
 98 |     ['bound',
 99 |      ['or', ['exactly', '*'], ['exactly', '+'], ['exactly', '?']],
100 |      ['inline', 'quantifier']],
101 |     ['quantifier', '?']]]],
102 |  ['rule',
103 |   ['rule_name', 'bound'],
104 |   ['flags'],
105 |   ['args'],
106 |   ['and',
107 |    ['apply', 'quantified'],
108 |    ['quantified',
109 |     ['and',
110 |      ['exactly', '='],
111 |      ['output', ['bound', ['apply', 'name'], ['inline', 'inline']]]],
112 |     ['quantifier', '?']]]],
113 |  ['rule',
114 |   ['rule_name', 'and'],
115 |   ['flags'],
116 |   ['args'],
117 |   ['quantified', ['apply', 'bound'], ['quantifier', '*']]],
118 |  ['rule',
119 |   ['rule_name', 'or'],
120 |   ['flags'],
121 |   ['args'],
122 |   ['and',
123 |    ['apply', 'and'],
124 |    ['quantified',
125 |     ['and', ['token', '|'], ['output', ['apply', 'and']]],
126 |     ['quantifier', '*']]]],
127 |  ['rule',
128 |   ['rule_name', 'rule'],
129 |   ['flags'],
130 |   ['args'],
131 |   ['and',
132 |    ['apply', 'spaces'],
133 |    ['output',
134 |     ['and',
135 |      ['bound', ['apply', 'name'], ['inline', 'rule_name']],
136 |      ['bound',
137 |       ['quantified', ['exactly', '!'], ['quantifier', '?']],
138 |       ['inline', 'flags']],
139 |      ['bound', ['apply', 'and'], ['inline', 'args']],
140 |      ['and', ['token', '='], ['output', ['apply', 'or']]]]]]],
141 |  ['rule',
142 |   ['rule_name', 'grammar'],
143 |   ['flags'],
144 |   ['args'],
145 |   ['and',
146 |    ['output', ['quantified', ['apply', 'rule'], ['quantifier', '*']]],
147 |    ['apply', 'spaces']]],
148 |  ['rule',
149 |   ['rule_name', 'letter'],
150 |   ['flags'],
151 |   ['args'],
152 |   ['or',
153 |    ['exactly', 'a'],
154 |    ['exactly', 'b'],
155 |    ['exactly', 'c'],
156 |    ['exactly', 'd'],
157 |    ['exactly', 'e'],
158 |    ['exactly', 'f'],
159 |    ['exactly', 'g'],
160 |    ['exactly', 'h'],
161 |    ['exactly', 'i'],
162 |    ['exactly', 'j'],
163 |    ['exactly', 'k'],
164 |    ['exactly', 'l'],
165 |    ['exactly', 'm'],
166 |    ['exactly', 'n'],
167 |    ['exactly', 'o'],
168 |    ['exactly', 'p'],
169 |    ['exactly', 'q'],
170 |    ['exactly', 'r'],
171 |    ['exactly', 's'],
172 |    ['exactly', 't'],
173 |    ['exactly', 'u'],
174 |    ['exactly', 'v'],
175 |    ['exactly', 'w'],
176 |    ['exactly', 'x'],
177 |    ['exactly', 'y'],
178 |    ['exactly', 'z'],
179 |    ['exactly', 'A'],
180 |    ['exactly', 'B'],
181 |    ['exactly', 'C'],
182 |    ['exactly', 'D'],
183 |    ['exactly', 'E'],
184 |    ['exactly', 'F'],
185 |    ['exactly', 'G'],
186 |    ['exactly', 'H'],
187 |    ['exactly', 'I'],
188 |    ['exactly', 'J'],
189 |    ['exactly', 'K'],
190 |    ['exactly', 'L'],
191 |    ['exactly', 'M'],
192 |    ['exactly', 'N'],
193 |    ['exactly', 'O'],
194 |    ['exactly', 'P'],
195 |    ['exactly', 'Q'],
196 |    ['exactly', 'R'],
197 |    ['exactly', 'S'],
198 |    ['exactly', 'T'],
199 |    ['exactly', 'U'],
200 |    ['exactly', 'V'],
201 |    ['exactly', 'W'],
202 |    ['exactly', 'X'],
203 |    ['exactly', 'Y'],
204 |    ['exactly', 'Z']]],
205 |  ['rule',
206 |   ['rule_name', 'digit'],
207 |   ['flags'],
208 |   ['args'],
209 |   ['or',
210 |    ['exactly', '0'],
211 |    ['exactly', '1'],
212 |    ['exactly', '2'],
213 |    ['exactly', '3'],
214 |    ['exactly', '4'],
215 |    ['exactly', '5'],
216 |    ['exactly', '6'],
217 |    ['exactly', '7'],
218 |    ['exactly', '8'],
219 |    ['exactly', '9']]],
220 |  ['rule',
221 |   ['rule_name', 'space'],
222 |   ['flags'],
223 |   ['args'],
224 |   ['or',
225 |    ['exactly', '\t'],
226 |    ['exactly', '\n'],
227 |    ['exactly', '\r'],
228 |    ['exactly', ' ']]],
229 |  ['rule',
230 |   ['rule_name', 'spaces'],
231 |   ['flags'],
232 |   ['args'],
233 |   ['quantified', ['apply', 'space'], ['quantifier', '*']]]]
234 | 


--------------------------------------------------------------------------------
/pymetaterp/python.py:
--------------------------------------------------------------------------------
  1 | import boot_stackless as boot
  2 | reload(boot)
  3 | from boot_stackless import *
  4 | from pdb import set_trace as bp
  5 | from bisect import bisect_left as bisect
  6 | 
  7 | class Interpreter(boot.Interpreter):
  8 |     def match(self, root, input=None, pos=-1, locals=None, debug=False):
  9 |         self.indentation = [0]
 10 |         self.default_locals = self.locals = {} if locals is None else dict(locals)
 11 |         self.debug = debug
 12 |         self.memoizer = {}
 13 |         return boot.Interpreter.match(self, root, input, pos)
 14 | 
 15 |     def eval(self, root):
 16 |         self.locals['self'] = self
 17 |         output = eval(root, globals(), self.locals)
 18 |         del self.locals['self']
 19 |         return output
 20 | 
 21 |     def new_step(self):
 22 |         root = self.stack[-1].root
 23 |         name = root.name
 24 |         calls = self.stack[-1].calls
 25 |         if name in ["and", "args", "output", "or"]:
 26 |             if len(root) == 0 and name in ["and", "args", "output"]:
 27 |                 return
 28 |             calls.extend(root)
 29 |         elif name in ["lookahead"]:
 30 |             calls.append(root[0])
 31 |         elif name == "exactly":
 32 |             if pop(self.input) != root[0]:
 33 |                 return MatchError("Not exactly %s" % root[0])
 34 |             return root[0]
 35 |         elif name == "token":
 36 |             while pop(self.input) in ['\t', ' ', '\\']:
 37 |                 if self.input[0][self.input[1]] == '\\':
 38 |                     pop(self.input)
 39 |             if self.input[1] == len(self.input[0]):
 40 |                 return MatchError("EOF")
 41 |             self.input[1] -= 1
 42 |             for char in root[0]:
 43 |                 if pop(self.input) != char:
 44 |                     return MatchError("Not exactly %s" % root[0])
 45 |             if root[0].isalpha():
 46 |                 top = pop(self.input)
 47 |                 if top.isalnum() or top == '_':
 48 |                     return MatchError("Prefix matched but didn't end.")
 49 |                 self.input[1] -= 1
 50 |             return root[0]
 51 |         elif name == "apply":
 52 |             if self.debug:
 53 |                 print " "*len(self.stack), "applying", root[NAME], self.input[1], str(self.input[0][self.input[1]+1:self.input[1]+11])[:20]
 54 |             if root[NAME] == "anything":
 55 |                 return pop(self.input)
 56 |             elif root[NAME] == "void":
 57 |                 return
 58 |             else:
 59 |                 key = (root[NAME], id(self.input[0]), self.input[1],
 60 |                        tuple(self.indentation))
 61 |                 if key in self.memoizer:
 62 |                     self.input = self.memoizer[key][1][:]
 63 |                     return self.memoizer[key][0]
 64 |                 self.stack[-1].key = key
 65 |                 calls.append(self.rules[root[NAME]][BODY])
 66 |             self.stack[-1].locals = self.locals
 67 |             self.locals = dict(self.default_locals)
 68 |         elif name == "rule_value":
 69 |             return self.eval(root[0])
 70 |         elif name == "predicate":
 71 |             output = self.eval(root[0])
 72 |             if not output:
 73 |                 return MatchError("Predicate evaluates to false")
 74 |             elif output == True:
 75 |                 return None
 76 |             else:
 77 |                 return Node("predicate", [output])
 78 |         elif name == "action":
 79 |             self.locals['self'] = self
 80 |             exec(root[0], globals(), self.locals)
 81 |             del self.locals['self']
 82 |             return
 83 |         else:
 84 |             return boot.Interpreter.new_step(self)
 85 |         return Eval
 86 | 
 87 |     def next_step(self):
 88 |         frame = self.stack[-1]
 89 |         root = frame.root
 90 |         name = root.name
 91 |         outputs = frame.outputs
 92 |         output = outputs[-1] if outputs else None
 93 |         is_error = type(output) == MatchError
 94 |         finished = len(outputs) == len(frame.calls)
 95 |         if self.debug and name in ["apply"]:
 96 |             print " "*len(self.stack), name, "->", output
 97 |         if is_error and name not in ["quantified", "or", "negation", "apply"]:
 98 |             return output
 99 |         elif not (finished or name in ["or", "quantified"]):
100 |             return Eval
101 |         if name in ["and", "args", "output"]:
102 |             assert(len(outputs) == len(root))
103 |             if any(child.name == "output" for child in root):
104 |                 outputs = [output for child, output in zip(root, outputs)
105 |                            if child.name == "output"]
106 |             elif any(child.name == "rule_value" for child in root):
107 |                 outputs = [output for child, output in zip(root, outputs)
108 |                            if child.name == "rule_value"]
109 |                 assert(len(outputs) == 1)
110 |             return to_node(outputs, self.join_str)
111 |         elif name in "bound":
112 |             if root[1].name == "inline":
113 |                 return Node(root[1][0], to_list(output))
114 |             else: # bind
115 |                 self.locals[root[1][0]] = output
116 |                 return
117 |         elif name == "apply":
118 |             # Need to run this line even on error
119 |             self.locals = frame.locals
120 |             output = boot.Interpreter.next_step(self)
121 |             self.memoizer[frame.key] = (output, self.input[:])
122 |             return output
123 |         elif name == "lookahead":
124 |             self.input = frame.input[:]
125 |             return output
126 |         else:
127 |             return boot.Interpreter.next_step(self)
128 |         return Eval
129 | 
130 |     def st(self):
131 |         source = self.source
132 |         stack = self.stack
133 |         filename = getattr(self, "filename", "<grammar>")
134 |         source_lines = [line+"\n" for line in source.split("\n")]
135 |         line_len = [len(l) for l in source_lines]
136 |         source_line_num = [sum(line_len[:i+1]) for i in xrange(len(line_len))]
137 |         func_name = "None"
138 |         for i, frame in enumerate(stack):
139 |             line_num = [bisect(source_line_num, p) for p in frame.root.pos]
140 |             rel_pos = [p - (source_line_num[line_num[0]-1] if line_num[0] else 0)
141 |                        for p in frame.root.pos]
142 |             lines = "".join(source_lines[line_num[0]: line_num[1]+1])
143 |             print str(i).ljust(2) + " In file " + '\033[92m' + filename + '\033[0m' + " line " + str(line_num[0]) + " function " + '\033[92m' + str(func_name) + " (" + frame.root.name + ")" + '\033[0m'
144 |             print lines[:rel_pos[0]] + '\033[91m' + lines[rel_pos[0]: rel_pos[1]] + '\033[0m' + lines[rel_pos[1]:-1]
145 |             if frame.root.name == "apply":
146 |                 func_name = frame.root[0]
147 | 
148 | def reformat_atom(atom, trailers):
149 |     output = atom
150 |     for trailer in to_list(trailers):
151 |         pos = (output.pos[0], trailer.pos[1])
152 |         if trailer.name == "arglist":
153 |             output = Node("__call__", [output, trailer], pos=pos)
154 |         elif trailer.name == "NAME":
155 |             output = Node("__getattr__", [output, Node("NAME", trailer,
156 |                                                        pos=trailer.pos)], pos=pos)
157 |         elif trailer.name == "subscriptlist":
158 |             output = Node("__getitem__", [output] + trailer, pos=pos)
159 |         else:
160 |             raise Exception("Unknown trailer %s" % trailer.name)
161 |     return output
162 | 
163 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=",
164 |                "in", "not in", "is not", "is"),
165 |               ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"),
166 |               ("*", "/", "%", "//"), ("**",))
167 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops}
168 | expr_ops = binary_ops[1:]
169 | 
170 | def reformat_binary(start, oper_and_atoms):
171 |     def parse(lhs, tokens, index=0):
172 |         threshold = priority[tokens[index][0][0]]
173 |         while index < len(tokens):
174 |             op, rhs = tokens[index]
175 |             assert(type(op) != str)
176 |             op = op[0]
177 |             if priority[op] < threshold:
178 |                 break
179 |             index += 1
180 |             while index < len(tokens) and\
181 |                   priority[tokens[index][0][0]] > priority[op]:
182 |                 rhs, index = parse(rhs, tokens, index)
183 |             lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1]))
184 |         return (lhs, index)
185 |     if not oper_and_atoms:
186 |         return start
187 |     tokens = zip(oper_and_atoms[::2], oper_and_atoms[1::2])
188 |     lhs, index = start, 0
189 |     while index < len(tokens):
190 |         lhs, index = parse(lhs, tokens, index)
191 |     return lhs
192 | 
193 | def any_token(input, binary=True):
194 |     ops = binary_ops if binary else expr_ops
195 |     old_input = input[:]
196 |     for tokens in ops:
197 |         for token in tokens:
198 |             if all(pop(input) == char for char in token):
199 |                 return token
200 |             input[:] = old_input[:]
201 |     return False
202 | 


--------------------------------------------------------------------------------
/pymetaterp/python_compiled.py:
--------------------------------------------------------------------------------
  1 | from boot_compiled import *
  2 | 
  3 | def eval_(expr):
  4 |     g.locals['self'] = g
  5 |     output = eval(expr, globals(), g.locals)
  6 |     del g.locals['self']
  7 |     return output
  8 | 
  9 | def lookahead(child):
 10 |     saved = g.input.position
 11 |     output = child()
 12 |     g.input.position = saved
 13 |     return output
 14 | 
 15 | def token(s):
 16 |     while g.input.next() in ['\t', '\n', '\r', ' ']:
 17 |         pass
 18 |     g.input.position -= 1
 19 |     for char in s:
 20 |         if g.input.next() != char:
 21 |             return MatchError("Not exactly %s" % char)
 22 |     if char.isalpha():
 23 |         top = g.input.next()
 24 |         if top.isalnum() or top == '_':
 25 |             return MatchError("Prefix matched but didn't end.")
 26 |         g.input.position -= 1
 27 |     return s
 28 | 
 29 | def and_(children):
 30 |     saved = g.input.position
 31 |     outputs = []
 32 |     output_mode = None
 33 |     for child in children:
 34 |         output = child()
 35 |         if isinstance(output, MatchError):
 36 |             g.input.position = saved
 37 |             return MatchError("And match failed")
 38 |         if output_mode:
 39 |             if getattr(output, "name", None) == output_mode:
 40 |                 outputs.extend(to_list(output.children))
 41 |         else:
 42 |             if getattr(output, "name", None) == "out":
 43 |                 outputs = to_list(output.children)
 44 |                 output_mode = "out"
 45 |             elif getattr(output, "name", None) == "rule_value":
 46 |                 outputs = to_list(output.children)
 47 |                 output_mode = "rule_value"
 48 |             else:
 49 |                 outputs.extend(to_list(output))
 50 |     return "".join(outputs) if outputs and all(type(output) == str for output in outputs) and len(outputs[0]) == 1\
 51 |         else outputs
 52 | 
 53 | # Not a rule! Should rename this node to just 'value'?
 54 | def rule_value(expr):
 55 |     # Not normally wrapped in Node, need to rethink!
 56 |     return Node("rule_value", eval_(expr))
 57 | 
 58 | def predicate(expr):
 59 |     output = eval_(expr)
 60 |     if not output:
 61 |         return MatchError("Predicate evaluates to false")
 62 |     elif output == True:
 63 |         return None
 64 |     else:
 65 |         return Node("predicate", [output])
 66 | 
 67 | def action(expr):
 68 |     g.locals['self'] = g
 69 |     exec(expr, globals(), g.locals)
 70 |     del g.locals['self']
 71 |     return
 72 | 
 73 | def bound(child, (type, name)):
 74 |     saved = g.input.position
 75 |     output = child()
 76 |     if type == "inline":
 77 |         return output if isinstance(output, MatchError) else\
 78 |             Node(name, output, (saved+1, g.input.position+1))
 79 |     else: # bind
 80 |         g.locals[name] = output
 81 | 
 82 | def apply_(name):
 83 |     if g.debug:
 84 |         print " "*g.nest, name, g.input.source[g.input.position+1: g.input.position+10]
 85 |     key = (name, id(g.input.source), g.input.position, tuple(g.indentation))
 86 |     # Should also memoize output indentation!
 87 |     if key in g.memoizer:
 88 |         g.input.source, g.input.position = g.memoizer[key][1][:]
 89 |         return g.memoizer[key][0]
 90 |     saved_locals = g.locals
 91 |     g.locals = g.default_locals
 92 |     # func, flagged
 93 |     g.nest += 1
 94 |     saved = g.input.position
 95 |     output = g.rules[name][0]()
 96 |     g.nest -= 1
 97 |     if g.debug:
 98 |         print " "*g.nest, name, "->", output
 99 |     g.locals = saved_locals
100 |     if (not isinstance(output, MatchError) and "!" in g.rules[name][1]) or\
101 |        (isinstance(output, list) and len(output) > 1):
102 |         output = Node(name, output, (saved+1, g.input.position+1))
103 |     g.memoizer[key] = (output, [g.input.source, g.input.position])
104 |     return output
105 | 
106 | def rule_void():
107 |     return
108 | 
109 | def reformat_atom(atom, trailers):
110 |     if trailers:
111 |         bp()
112 |     output = atom
113 |     for trailer in trailers:
114 |         pos = (output.pos[0], trailer.pos[1])
115 |         if trailer.name == "arglist":
116 |             output = Node("__call__", [output, trailer], pos=pos)
117 |         elif trailer.name == "NAME":
118 |             output = Node("__getattr__", [output, Node("NAME", trailer,
119 |                                                        pos=trailer.pos)], pos=pos)
120 |         elif trailer.name == "subscriptlist":
121 |             output = Node("__getitem__", [output] + trailer, pos=pos)
122 |         else:
123 |             raise Exception("Unknown trailer %s" % trailer.name)
124 |     return output
125 | 
126 | 
127 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=",
128 |                "in", "not in", "is not", "is"),
129 |               ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"),
130 |               ("*", "/", "%", "//"), ("**",))
131 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops}
132 | expr_ops = binary_ops[1:]
133 | 
134 | def reformat_binary(start, oper_and_atoms):
135 |     def parse(lhs, tokens, index=0):
136 |         threshold = priority[tokens[index][0][0]]
137 |         while index < len(tokens):
138 |             op, rhs = tokens[index]
139 |             assert(type(op) != str)
140 |             op = op[0]
141 |             if priority[op] < threshold:
142 |                 break
143 |             index += 1
144 |             while index < len(tokens) and\
145 |                   priority[tokens[index][0][0]] > priority[op]:
146 |                 rhs, index = parse(rhs, tokens, index)
147 |             lhs = Node("__binary__", [op, lhs, rhs], pos=(lhs.pos[0], rhs.pos[1]))
148 |         return (lhs, index)
149 |     if not oper_and_atoms:
150 |         return start
151 |     tokens = zip(oper_and_atoms[::2], oper_and_atoms[1::2])
152 |     lhs, index = start[0], 0
153 |     while index < len(tokens):
154 |         lhs, index = parse(lhs, tokens, index)
155 |     return lhs
156 | 
157 | def any_token(input, binary=True):
158 |     ops = binary_ops if binary else expr_ops
159 |     old_input = g.input.position
160 |     for tokens in ops:
161 |         for token in tokens:
162 |             if all(g.input.next() == char for char in token):
163 |                 return token
164 |             g.input.position = old_input
165 |     return False
166 | 
167 | def match(tree, inp, debug=False, locals=None):
168 |     g.rules = {'anything': (rule_anything, ''), 'letter': (rule_letter, ''),
169 |                'digit': (rule_digit, ''), 'void': (rule_void, ''),}
170 |     g.indentation = [0]
171 |     g.memoizer = {}
172 |     g.locals = g.default_locals = {} if locals is None else dict(locals)
173 |     g.nest = 0
174 |     g.debug = debug
175 |     exec to_python(tree)
176 |     g.input = Source(inp)
177 |     return rule_grammar()
178 | 


--------------------------------------------------------------------------------
/pymetaterp/python_grammar.py:
--------------------------------------------------------------------------------
  1 | full_definition = r"""
  2 | comment = ('#' {(~'\n' {anything})*})=comment
  3 | hspaces = (' ' | '\t' | escaped_linebreak)*
  4 | hspacesp = (' ' | '\t' | escaped_linebreak)+
  5 | escaped_linebreak = '\\' {'\n'}
  6 | 
  7 | single_input = EMPTY_LINE | simple_stmt | (compound_stmt EMPTY_LINE)
  8 | file_input = (EMPTY_LINE | SAME_INDENT stmt)* ENDMARKER
  9 | eval_input = testlist NEWLINE? EMPTY_LINE* ENDMARKER
 10 | 
 11 | decorator! = "@" {dotted_name ("(" {arglist} ")")?} NEWLINE
 12 | decorators! = decorator+
 13 | decorated = decorators (classdef | funcdef)
 14 | funcdef = "def" {NAME} "(" {parameters | void=parameters} ")" ":" {suite}
 15 | # Check order validity elsewhere (at most one remaining_args and one kwargs)
 16 | parameters! = {fpdef_opt (comma {fpdef_opt})*} comma?
 17 | 
 18 | fpdef = NAME | "(" fplist ")"
 19 | fpdef_opt = fpdef ("=" {test})? | "*" {NAME=remaining_args} | "**" {NAME=kwargs}
 20 | fplist = {fpdef (comma {fpdef})*} comma?
 21 | 
 22 | stmt = compound_stmt | simple_stmt
 23 | simple_stmt = {small_stmt (";" {small_stmt})*} ";"? NEWLINE
 24 | small_stmt = print_stmt | del_stmt | pass_stmt | flow_stmt | comment
 25 |            | import_stmt | global_stmt | exec_stmt | assert_stmt | expr_stmt
 26 | 
 27 | expr_stmt = aug_assign | regular_assign | testlist
 28 | aug_assign_symbol = "+=" | "-=" | "*=" | "/=" | "%=" | "&="
 29 |                   | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
 30 | aug_assign = testlist aug_assign_symbol=operation (yield_expr|testlist)
 31 | regular_assign = testlist ("=" {yield_expr|testlist})+
 32 | # For normal assignments, additional restrictions enforced by the interpreter
 33 | print_stmt! = "print" { {test ("," {test})*} ","?
 34 |                       | ">>" test ( ("," test)+ ","? )? | void}
 35 | del_stmt! = "del" hspacesp {exprlist}
 36 | pass_stmt! = "pass" {}
 37 | flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
 38 | break_stmt! = "break" {}
 39 | continue_stmt! = "continue" {}
 40 | return_stmt! = "return" {testlist?}
 41 | yield_stmt = yield_expr
 42 | raise_stmt! = "raise" {(test ("," test ("," test))?)?}
 43 | import_stmt = simport_stmt | import_name | import_from
 44 | simport_stmt! = "simport" {NAME}
 45 | import_name = "import" {import_names}
 46 | import_names! = dotted_as_name ("," {dotted_as_name})*
 47 | import_from! = "from" {"."* dotted_name | "."+}
 48 |                "import" {"*"=import_all | "(" {import_as_names} ")" | import_as_names}
 49 | import_as_name = NAME ("as" {NAME})?
 50 | dotted_as_name = dotted_name ("as" {NAME})?
 51 | import_as_names! = {import_as_name ("," {import_as_name})*} ","?
 52 | dotted_name = NAME ("." {NAME})*
 53 | global_stmt = "global" NAME ("," NAME)*
 54 | exec_stmt! = "exec" {expr ("in" {test} ("," {test})?)?}
 55 | assert_stmt! = "assert" {test ("," test)?}
 56 | 
 57 | compound_stmt = if_stmt | while_true_stmt=while_true | while_stmt
 58 |               | simple_for_stmt | for_stmt | try_stmt | with_stmt
 59 |               | funcdef | classdef | decorated
 60 | if_stmt = ("if" {test} ":" {suite})=single_if
 61 |           ((SAME_INDENT "elif" {test} ":" {suite})=single_if)*
 62 |           ((SAME_INDENT "else" ":" {void=gen_true suite})=single_if)?
 63 | while_true_stmt = "while_true" ":" {suite}
 64 | while_stmt = "while" {test} ":" {suite (SAME_INDENT "else" ":" {suite})?}
 65 | for_stmt = "for" {exprlist} "in" {testlist} ":" {suite} {(SAME_INDENT "else" ":" {suite})?}
 66 | simple_for_stmt = "simple_for" {exprlist} "in" {testlist} ":" {suite}
 67 | try_stmt! = "try" ":" {suite}
 68 |             {((SAME_INDENT {exception} ":" {suite})=except_clause)+=except_clauses
 69 |              (SAME_INDENT  "else" ":" suite)?
 70 |              (SAME_INDENT "finally" ":" suite)?
 71 |              | SAME_INDENT "finally" ":" suite}
 72 | with_stmt = "with" with_item ("," with_item)* ":" suite
 73 | with_item = test ("as" expr)?
 74 | # NB compile.c makes sure that the default except clause is last
 75 | exception! = "except" {(test (("as" | ",") {test})?)?}
 76 | # Should "give back" the consumed empty lines at the end!
 77 | suite = NEWLINE INDENT {(SAME_INDENT stmt | EMPTY_LINE)+} DEDENT
 78 |       | simple_stmt
 79 | 
 80 | testlist = {test ("," {test})*} ","?
 81 | yield_expr! = "yield" {testlist?}
 82 | 
 83 | test = lambdef | or_test ("if" {or_test} {("else" {test})?})?
 84 | or_test = and_test ("or" {and_test})*
 85 | and_test = not_test ("and" {not_test})*
 86 | not_test = ("not" {not_test})=not_test | comparison
 87 | 
 88 | comparison = factor:start (hspaces {?(any_token(self.input))}
 89 |                            hspaces {factor})*:oper_and_atoms
 90 |              -> reformat_binary(start, oper_and_atoms)
 91 | expr = factor:start (hspaces {?(any_token(self.input, binary=False))}
 92 |                      hspaces {factor})*:oper_and_atoms
 93 |      -> reformat_binary(start, oper_and_atoms)
 94 | 
 95 | factor = ("+"|"-"|"~")* power
 96 | power = trailed_atom ("**" factor)?
 97 | trailed_atom = atom:atom trailer*:trailers -> reformat_atom(atom, trailers)
 98 | atom = "(" spaces {parenthesis} spaces ")"
 99 |      | "[" spaces {listmaker | void=listmaker} spaces "]"
100 |      | "{" spaces {dictmaker} spaces "}"
101 |      | "{" {setmaker} spaces "}"
102 |      | "`" {(stmt | small_stmt)=thunk} "`"
103 |      | STRINGS | NAME | NUMBER
104 | parenthesis = yield_expr | testlist_comp=generator | tuple 
105 |             | test | void=no_param
106 | listmaker! = (test list_for list_iter*)=listcomp
107 |            | {test (comma {test})*} comma?
108 | testlist_comp = test list_for list_iter*
109 | tuple! = ({test} comma)+ test?
110 | lambdef! = "lambda" {parameters? | void=parameters} ":" {test}
111 | trailer = "(" spaces {arglist} spaces ")"
112 |         | "[" spaces {subscriptlist=subscriptlist} spaces "]"
113 |         | "." {NAME}
114 | subscriptlist! = subscript ("," {subscript})* ","?
115 | subscript! = "..."=ellipsis | ({test?=start} ":" {test?=stop} {step?})=slice | test
116 | exprlist = {expr ("," {expr})*} ","?
117 | step! = ":" {test?}
118 | dictmaker! = ({test} ":" {test} {list_for} {list_iter*})=dictcomp
119 |            | {({test} ":" {test})=pair ((comma {test} ":" {test})=pair)*} comma?
120 |            | void
121 | 
122 | setmaker! = test (list_for list_iter* | (("," test)* ","?))
123 | 
124 | classdef = "class" {NAME} {("(" {testlist?} ")")?=parents} ":" {suite}
125 | 
126 | arglist! = ({argument} comma)* ( "**" {test=kwargs}
127 |                                | "*" {test=remaining_args ("," keyword_arg)* ("," "**" {test=kwargs})?}
128 |                                | {argument | void} )
129 |                        comma?
130 | 
131 | comma = "," spaces
132 | 
133 | argument = keyword_arg | listcomp_arg
134 | keyword_arg = {test} "=" {test}
135 | listcomp_arg = test (list_for list_iter*)?
136 | 
137 | list_iter = list_for | list_if
138 | list_for = spaces "for" {exprlist} "in" {or_test} # {testlist_safe}
139 | list_if! = spaces "if" {or_test}
140 | 
141 | testlist_safe = or_test ((',' or_test)+ ','?)?
142 | testlist1 = test ("," test)*
143 | 
144 | NUMBER! = hspaces digit+:s -> int("".join(n[0] for n in s))
145 | # Probably need to check that the result isn't a reserved word.
146 | NAME! = hspaces {((letter | '_') (letter | digit | '_')*)}
147 | STRINGS = {STRING | RAW_STRING=STRING} (spaces {STRING | RAW_STRING=STRING})*
148 | STRING! = hspaces stype? '"' '"' '"' {(escaped_char | ~('"' '"' '"') {anything})*} '"' '"' '"'
149 |        | hspaces stype? '\'' {(escaped_char | ~'\'' anything)*} '\''
150 |        | hspaces stype? '"' {(escaped_char | ~'"' anything)*} '"'
151 | RAW_STRING = hspaces 'r' '"' '"' '"' {(~('"' '"' '"') {anything})*} '"' '"' '"'
152 |            | hspaces 'r' '\'' {(~'\'' anything)*} '\''
153 |            | hspaces 'r' '"' {(~'"' anything)*} '"'
154 | stype! = 'b'
155 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'}
156 | EMPTY_LINE = (hspaces comment? ('\n' | '\r'))=EMPTY_LINE
157 | NEWLINE = hspaces (comment hspaces)? ('\n' | '\r')
158 | SAME_INDENT = hspaces:s ?(self.indentation[-1] == (len(s) if s != None else 0))
159 | ENDMARKER = ~anything
160 | INDENT = ~~hspaces:s !(self.indentation.append(len(s) if s != None else 0))
161 | DEDENT = !(self.indentation.pop())
162 | 
163 | grammar = file_input
164 | """
165 | 
166 | extra = """
167 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z'
168 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
169 | space = '\t'|'\n'|'\r'|' '|comment
170 | spaces = space*
171 | """
172 | 


--------------------------------------------------------------------------------
/pymetaterp/util.py:
--------------------------------------------------------------------------------
 1 | def simple_wrap_tree(root):
 2 |     if type(root) != list:
 3 |         return root
 4 |     return Node(root[0], map(simple_wrap_tree, root[1:]))
 5 | 
 6 | class MatchError(Exception):
 7 |     pass
 8 | 
 9 | class Node(list):
10 |     def __init__(self, name=None, value=None, params=None, **kw):
11 |         list.__init__(self, value if value is not None else [])
12 |         self.name = name
13 |         self.params = params if params is not None else {}
14 |         for key, value in kw.items():
15 |             setattr(self, key, value)
16 | 
17 |     def __repr__(self):
18 |         return "%s%s" % (self.name, list.__repr__(self))
19 | 
20 |     def pprint(self, max_depth=None, max_width=None, indent=0, filter=None):
21 |         if max_depth and indent/2 > max_depth:
22 |             return
23 |         print_node = bool(filter is None or filter(self))
24 |         if print_node:
25 |             print " "*indent + self.name
26 |         for child in self:
27 |             if not hasattr(child, "pprint"):
28 |                 if print_node:
29 |                     print "%s%s %s" % (" "*(indent + 2), type(child).__name__,
30 |                                        repr(child))
31 |             else:
32 |                 child.pprint(max_depth, max_width, indent + 2*print_node, filter)
33 | 
34 |     def save(self, filename="tree.py"):
35 |         from pprint import pprint
36 |         f = open(filename, "w")
37 |         f.write("tree = ")
38 |         pprint(self.to_list(), f)
39 | 
40 |     def to_list(self):
41 |         return [self.name] + [elem.to_list() if hasattr(elem, "name") else elem
42 | 
43 |                               for elem in self]
44 | 
45 |     def to_lisp(self):
46 |         return "(%s)" % " ".join([self.name] +\
47 |                                  [elem.to_lisp() if hasattr(elem, "name") else\
48 |                                   repr(elem).replace("'", '"') if elem != '"' else '"\\""'
49 |                                   for elem in self])
50 | 
51 |     @property
52 |     def descendants(self):
53 |         for child in self:
54 |             if type(child) == Node:
55 |                 for gc in child.descendants:
56 |                     yield gc
57 |             yield child
58 | 
59 | def compare_trees(t1, t2, indices):
60 |     for ind in indices:
61 |         t1 = t1[ind]
62 |         t2 = t2[ind]
63 |     return [equal_trees(x, y) for x,y in zip(t1, t2)]
64 | 
65 | def equal_trees(t1, t2):
66 |     if type(t1) != Node or type(t2) != Node:
67 |         return t1 == t2
68 |     return type(t1) == type(t2) and t1.name == t2.name and\
69 |         all(equal_trees(c1, c2) for c1, c2 in zip(t1, t2) if type(t1) == Node)
70 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # pymetaterp
  2 | 
  3 | This is a python AST builder that uses no Python modules. A longer stackless version is available for easier porting. `single_file.py` is a stand-alone 502 lines script.
  4 | 
  5 | Its (also) just another parsing expression grammar (PEG) based parser with one major difference. The parsed grammar is interpreted instead of compiled. This makes it easy to modify the language (by editing its grammar) *as well as* the language that grammar is written in (and the language of *that* grammar).
  6 | 
  7 | [This is a **pre-release** of sorts. There are probably some errors and missing information.]
  8 | 
  9 | ## Download and run
 10 | 
 11 |     git clone https://github.com/asrp/pymetaterp
 12 |     cd pymetaterp
 13 |     python single_file.py
 14 | 
 15 | or
 16 | 
 17 |     python single_file.py filename.py
 18 | 
 19 | This will print out the AST of the given file (or `single_file.py`'s own AST). Sample beginning of the output:
 20 | 
 21 |     file_input
 22 |       regular_assign
 23 |         testlist
 24 |           NAME
 25 |             str 'NAME'
 26 |           NAME
 27 |             str 'FLAGS'
 28 | 
 29 | To run files from the library
 30 | 
 31 |     python test/boot_test.py 
 32 |     python test/python_parse_test.py 
 33 | 
 34 | ## Files
 35 | 
 36 | `single_file.py` is mainly for demonstration. This module is otherwise separated into files. There are many files but they are mostly separate. The import dependencies is
 37 | 
 38 |     util.py
 39 |       boot.py
 40 |       boot_stackless.py
 41 |         python.py
 42 | 
 43 | Other files have no imports. To get something useful, you'll have to import multiple files. See `test/python_parse_test.py` and `test/boot_test.py` for some examples.
 44 | 
 45 | ## Repl
 46 | 
 47 | An obvious thing *missing* is the grammar read-eval-print loop (repl) so the interpreter can be fed one rule at a time, parsing subsequence input using the rules seen so far.
 48 | 
 49 | ## Source reading order
 50 | 
 51 | I'd suggest reading `boot.py` and `bootstrap` in `boot_grammar.py` first. The two form the core and together with `boot_tree.py`, they can regenerate `boot_tree`.
 52 | 
 53 | Then `boot_stackless` is the same as `boot.py` but doesn't use the Python call stack/recursion for parsing.
 54 | 
 55 | `python.py` adds functionality to the `boot.py` interpreter. `diff` in `boot_grammar.py` adds the syntax for those.
 56 | 
 57 | Finally, `python_grammar.py` contains the python grammar to be finally parsed.
 58 | 
 59 | ## Python language parsed
 60 | 
 61 | The module builds the AST for Python 2.x programs. It is able to parse all of Python 2.x (in fact, it contains a slightly modified version of the Python 2.x grammar) but is less lenient with whitespaces. For example, parsing
 62 | 
 63 |     from my_module import (var1, var2,
 64 |                            var3, var4)
 65 | 
 66 | gives an error.
 67 | 
 68 | *Since this is a pre-release, there are likely bugs with parts of the language I don't use so often. It _can_ build the AST for all files included here.* 
 69 | 
 70 | ## Gramamr language differences
 71 | 
 72 | The beginning of `boot_grammar.py` self-describes the grammar. Its a PEG so all "or" (`|`) returns the first match and "and" and "quantified" (`*, +, ?`) are greedy.
 73 | 
 74 |     name = (letter | '_') (letter | digit | '_')*
 75 |     expr = apply | exactly | token | parenthesis | output
 76 | 
 77 |     exactly! = "'" {(escaped_char | ~'\'' anything)*} "'"
 78 |     token! = "\"" {(escaped_char | ~'"' anything)*} "\""
 79 |     escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'}
 80 |     apply! = ('\t'|' ')* {name}
 81 |     parenthesis = "(" {or} ")"
 82 |     output! = "{" {or} "}"
 83 | 
 84 |     not = "~" {expr=negation} | expr
 85 |     quantified = not (('*' | '+' | '?')=quantifier)?
 86 |     bound = quantified ('=' {name=inline})?
 87 |     and = bound*
 88 |     or = and ("|" {and})*
 89 | 
 90 |     rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})}
 91 |     grammar = {rule*} spaces
 92 | 
 93 | The main difference from other PEG.
 94 | 
 95 | - output rule: `a {b c} d` will match the concatenation of `a b c d` but only return what matched `b c`.
 96 | - quantifier collapse: `letter letter*` returns a list rather than a pair with the second element being a list matching `letter*`.
 97 | - nested and collapse: `a (b (c d)) e` has the same output as `a b c d e` (see inline below if some pairs need to be explicitly grouped).
 98 | - node collapsing: nodes in the output with only one child are replaced by their parent, unless the `!` ("don't collapse") flag is set for that node.
 99 | - inline: shamelessly taken [Ohm](https://github.com/ohm) but with a slightly different interpretation. `expression=name` creates a node named `name` wrapping the output of `expression`.
100 | - rule replacement: having a second `rule_name = expression` line replaces the first definition of `rule_name` (instead of appending into an or).
101 | - two basic tokens: there are two basic token types: `'a'` (single quote) and `"a"` (double quote). The double quoted token allows whitespace before matching.
102 | 
103 | ## Regenerating boot_tree.py
104 | 
105 | Create some tree `match_tree` using `Interpreter.match` and call `save` on the result.
106 | 
107 |     match_tree.save("tree.py")
108 | 
109 | ## Left recursion
110 | 
111 | While "[PEG/packrat parsers can support left-recursion]((http://www.vpri.org/pdf/tr2007002_packrat.pdf))", the tree output isn't the one we want. The python functions `reformat_binary` and `reformat_atom` fixes a parsed tree's ouput.
112 | 
113 | ## Source oddities
114 | 
115 | ### Two hard-coded rules
116 | 
117 |             if root[NAME] == "anything":
118 |                 return pop(self.input)
119 |             elif root[NAME] == "void":
120 |                 return
121 | 
122 | ### Hard-coded semantics for tokens
123 | 
124 |             if name == "token":
125 |                 while pop(self.input) in self.whitespace:
126 |                     if self.input[0][self.input[1]] == '\\':
127 |                         pop(self.input)
128 |                 self.input[1] -= 1
129 |             if name == "token" and root[0].isalpha():
130 |                 top = pop(self.input)
131 |                 if top.isalnum() or top == '_':
132 |                     raise MatchError("Prefix matched but didn't end.")
133 |                 self.input[1] -= 1
134 | 
135 | ## Optimization
136 | 
137 | Some effort were made to make these files short (especially `single_file.py`) but not too much. There are still some asserts around and commented print statements that can be useful for debugging. The final goal is, of course, to reduce the program's complexity and verbosity, not its line count.
138 | 
139 | ## Missing features
140 | 
141 | Features/bloat from a longer version of this program not (yet?) moved over:
142 | 
143 | - Debugging tree of nodes visited and their input and output
144 | - Function arguments (its in the grammar but not the interpreter)
145 | - Nested list inputs (its also in the grammar but not the interpreter)
146 | - name, args, flags, body as parameters instead of positional children
147 | - ~~Memoization~~
148 | - ~~Matched input start and end positions~~
149 | - Exact python expression matching for predicate, action and rule value. `balanced` is used as a simpler heuristic for now.
150 | 
151 | ## Removing features
152 | 
153 | To get a smaller file with just the basics.
154 | 
155 |     patch -R pymetaterp/python.py < patches/python_pos.patch
156 |     patch -R pymetaterp/python.py < patches/python_memoizer.patch
157 |     patch -R pymetaterp/boot_stackless.py < patches/boot_pos.patch
158 |     patch -R pymetaterp/boot_stackless.py < patches/boot_memoizer.patch
159 | 
160 | ## Readings
161 | 
162 | - [Ometa](http://www.tinlizzie.org/ometa/) - Warth's thesis reads very well.
163 | - [PEG and packrat parser](http://bford.info/packrat/)
164 | - [Packrat Parsers Can Support Left Recursion](http://www.vpri.org/pdf/tr2007002_packrat.pdf)
165 | 
166 | ## Other similar projects
167 | 
168 | - [parsimonious](https://github.com/erikrose/parsimonious)
169 | - [Pymeta](https://pypi.python.org/pypi/PyMeta/)
170 | - [pyparsing](http://pyparsing.wikispaces.com/)
171 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | setup(name='pymetaterp',
 3 |       version='1.0',
 4 |       description='A python parser that builds python ASTs in 502 lines of python without using modules',
 5 |       url='https://github.com/asrp/pymetaterp',
 6 |       author='asrp',
 7 |       author_email='asrp@email.com',
 8 |       packages=['pymetaterp'],
 9 |       keywords='parser peg python minimal')
10 | 


--------------------------------------------------------------------------------
/single_file.py:
--------------------------------------------------------------------------------
  1 | NAME, FLAGS, ARGS, BODY = [0, 1, 2, 3]
  2 | inf = float("inf")
  3 | 
  4 | class MatchError(Exception):
  5 |     pass
  6 | 
  7 | class Node(list):
  8 |     def __init__(self, name=None, value=None):
  9 |         list.__init__(self, value if value is not None else [])
 10 |         self.name = name
 11 | 
 12 |     def __repr__(self):
 13 |         return "%s%s" % (self.name, list.__repr__(self))
 14 | 
 15 |     def pprint(self, indent=0):
 16 |         print " "*indent + self.name
 17 |         for child in self:
 18 |             if not hasattr(child, "pprint"):
 19 |                 print " "*(indent + 1), type(child).__name__, repr(child)
 20 |             else:
 21 |                 child.pprint(indent + 2)
 22 | 
 23 | def simple_wrap_tree(root):
 24 |     if type(root) != list:
 25 |         return root
 26 |     return Node(root[0], map(simple_wrap_tree, root[1:]))
 27 | 
 28 | def pop(input):
 29 |     input[1] += 1
 30 |     try:
 31 |         return input[0][input[1]]
 32 |     except IndexError:
 33 |         raise MatchError("EOF")
 34 | 
 35 | def to_list(output):
 36 |     return output  if getattr(output, "name", None) == "And" else\
 37 |            []      if output is None else\
 38 |            [output]
 39 | 
 40 | def to_node(outputs):
 41 |     outputs = [elem for output in outputs for elem in to_list(output)]
 42 |     return outputs[0]       if len(outputs) == 1 else\
 43 |            None             if len(outputs) == 0 else\
 44 |            "".join(outputs) if all(type(output) == str for output in outputs)\
 45 |            else Node("And", outputs)
 46 | 
 47 | class Interpreter:
 48 |     def __init__(self, grammar_tree, whitespace="\t\n\r \\"):
 49 |         self.rules = {rule[NAME][0]:rule for rule in grammar_tree}
 50 |         self.whitespace = whitespace
 51 | 
 52 |     def eval(self, root):
 53 |         self.locals['self'] = self
 54 |         output = eval(root, globals(), self.locals)
 55 |         del self.locals['self']
 56 |         return output
 57 | 
 58 |     def match(self, root, new_input=None, new_pos=-1):
 59 |         """ >>> g.match(g.rules['grammar'][-1], "x='y'") """
 60 |         if new_input is not None:
 61 |             self.input = [new_input, new_pos]
 62 |             self.indentation = [0]
 63 |             self.locals = {}
 64 |         old_input = self.input[:]
 65 |         name = root.name
 66 |         if name in ["and", "args", "output"]:
 67 |             outputs = [self.match(child) for child in root]
 68 |             if any(child.name == "output" for child in root):
 69 |                 outputs = [output for child, output in zip(root, outputs)
 70 |                            if child.name == "output"]
 71 |             elif any(child.name == "rule_value" for child in root):
 72 |                 outputs = [output for child, output in zip(root, outputs)
 73 |                            if child.name == "rule_value"]
 74 |                 assert(len(outputs) == 1)
 75 |         elif name == "quantified":
 76 |             assert(root[1].name == "quantifier")
 77 |             lower, upper = {"*": (0, inf), "+": (1, inf), "?": (0, 1)}[root[1][0]]
 78 |             outputs = []
 79 |             while len(outputs) < upper:
 80 |                 last_input = self.input[:]
 81 |                 try:
 82 |                     outputs.append(self.match(root[0]))
 83 |                 except MatchError:
 84 |                     self.input = last_input[:]
 85 |                     break
 86 |             if lower > len(outputs):
 87 |                 raise MatchError("Matched %s < %s times" % (len(outputs), lower))
 88 |         elif name == "or":
 89 |             for child in root:
 90 |                 try:
 91 |                     return self.match(child)
 92 |                 except MatchError:
 93 |                     self.input = old_input[:]
 94 |             raise MatchError("All Or matches failed")
 95 |         elif name in ["exactly", "token"]:
 96 |             if name == "token":
 97 |                 while pop(self.input) in self.whitespace:
 98 |                     if self.input[0][self.input[1]] == '\\':
 99 |                         pop(self.input)
100 |                 self.input[1] -= 1
101 |             for char in root[0]:
102 |                 if pop(self.input) != char:
103 |                     raise MatchError("Not exactly %s" % root[0])
104 |             if name == "token" and root[0].isalpha():
105 |                 top = pop(self.input)
106 |                 if top.isalnum() or top == '_':
107 |                     raise MatchError("Prefix matched but didn't end.")
108 |                 self.input[1] -= 1
109 |             return root[0]
110 |         elif name == "apply":
111 |             #import inspect
112 |             #print " "*(len(inspect.stack())-9), "matching", name, root[NAME], self.input[1], self.input[0][self.input[1]+1:self.input[1]+11]
113 |             if root[NAME] == "anything":
114 |                 return pop(self.input)
115 |             elif root[NAME] == "void":
116 |                 return
117 |             old_locals = self.locals
118 |             self.locals = {}
119 |             try:
120 |                 outputs = self.match(self.rules[root[NAME]][BODY])
121 |             finally:
122 |                 self.locals = old_locals
123 |             if root[NAME] == "escaped_char":
124 |                 chars = dict(["''", '""', "t\t", "n\n", "r\r", "b\b", "f\f", "\\\\"])
125 |                 return chars[outputs[-1]]
126 |             and_node = getattr(outputs, "name", None) == "And"
127 |             make_node = "!" in self.rules[root[NAME]][FLAGS] or\
128 |                         (and_node and len(outputs) > 1)
129 |             if not make_node:
130 |                 return outputs
131 |             return Node(root[NAME], to_list(outputs))
132 |         elif name in "bound":
133 |             if root[1].name == "inline":
134 |                 return Node(root[1][0], to_list(self.match(root[0])))
135 |             else: # bind
136 |                 self.locals[root[1][0]] = self.match(root[0])
137 |                 return
138 |         elif name == "negation":
139 |             try:
140 |                 self.match(root[0])
141 |             except MatchError:
142 |                 self.input = old_input
143 |                 return None
144 |             raise MatchError("Negation true")
145 |         elif name == "rule_value":
146 |             return self.eval(root[0])
147 |         elif name == "predicate":
148 |             output = self.eval(root[0])
149 |             if not output:
150 |                 raise MatchError("Predicate evaluates to false")
151 |             return None if output == True else Node("predicate", [output])
152 |         elif name == "action":
153 |             self.locals['self'] = self
154 |             exec(root[0], globals(), self.locals)
155 |             del self.locals['self']
156 |             return
157 |         elif name == "lookahead":
158 |             output = self.match(root[0])
159 |             self.input = old_input[:]
160 |             return output
161 |         else:
162 |             raise Exception("Unknown operator %s" % name)
163 |         return to_node(outputs)
164 | 
165 | def reformat_atom(atom, trailers):
166 |     output = atom
167 |     for trailer in to_list(trailers):
168 |         if trailer.name == "arglist":
169 |             output = Node("__call__", [output, trailer])
170 |         elif trailer.name == "NAME":
171 |             output = Node("__getattr__", [output, Node("NAME", trailer)])
172 |         elif trailer.name == "subscriptlist":
173 |             output = Node("__getitem__", [output] + trailer)
174 |         else:
175 |             raise Exception("Unknown trailer %s" % trailer.name)
176 |     return output
177 | 
178 | binary_ops = ((">=", "<=", "<>", "<", ">", "==", "!=",
179 |                "in", "not in", "is not", "is"),
180 |               ("|",), ("^",), ("&",), ("<<", ">>"), ("+", "-"),
181 |               ("*", "/", "%", "//"), ("**",))
182 | priority = {op:i for i, ops in enumerate(binary_ops) for op in ops}
183 | expr_ops = binary_ops[1:]
184 | 
185 | def reformat_binary(start, tokens):
186 |     def parse(lhs, tokens, index=0):
187 |         threshold = priority[tokens[index][0][0]]
188 |         while index < len(tokens):
189 |             op, rhs = tokens[index]
190 |             op = op[0]
191 |             if priority[op] < threshold:
192 |                 break
193 |             index += 1
194 |             while index < len(tokens) and\
195 |                   priority[tokens[index][0][0]] > priority[op]:
196 |                 rhs, index = parse(rhs, tokens, index)
197 |             lhs = Node("__binary__", [op, lhs, rhs])
198 |         return (lhs, index)
199 |     if not tokens:
200 |         return start
201 |     tokens = zip(tokens[::2], tokens[1::2])
202 |     lhs, index = start, 0
203 |     while index < len(tokens):
204 |         lhs, index = parse(lhs, tokens, index)
205 |     return lhs
206 | 
207 | def any_token(input, binary=True):
208 |     ops = binary_ops if binary else expr_ops
209 |     old_input = input[:]
210 |     for tokens in ops:
211 |         for token in tokens:
212 |             if all(pop(input) == char for char in token):
213 |                 return token
214 |             input[:] = old_input[:]
215 |     return False
216 | 
217 | grammar = r"""
218 | expr = apply | exactly | token | parenthesis | output | list
219 |      | rule_value | predicate | action
220 | 
221 | exactly! = "'" {(escaped_char | ~'\'' anything)*} "'"
222 | token! = "\"" {(escaped_char | ~'"' anything)*} "\""
223 | apply! = indentation? {name ('(' {balanced=args} ')')?}
224 | parenthesis = "(" {or} ")"
225 | output! = "{" {or} "}"
226 | list! = "[" {or} "]"
227 | predicate! = "?(" {balanced} ')'
228 | action! = "!(" {balanced} ')'
229 | rule_value! = "->" hspaces {(escaped_char | ~'\n' anything)*}
230 | 
231 | not = "~" "~" {expr=lookahead} | "~" {expr=negation} | expr
232 | quantified = not (('*' | '+' | '?')=quantifier)?
233 | bound = ":" {name=bind}
234 |       | quantified (':' {name=bind} | '=' {name=inline})?
235 | and = bound*
236 | or = and ("|" {and})*
237 | 
238 | rule = spaces {name=rule_name '!'?=flags and=args ("=" {or})}
239 | grammar = {rule*} spaces
240 | 
241 | comment = '#' (~'\n' anything)*
242 | indentation = (hspaces ('\r' '\n' | '\r' | '\n'))* hspacesp
243 | name = (letter | '_') (letter | digit | '_')*
244 | balanced = (escaped_char | '(' balanced ')' | ~')' anything)*
245 | """
246 | 
247 | python_grammar = r"""
248 | single_input = EMPTY_LINE | simple_stmt | (compound_stmt EMPTY_LINE)
249 | file_input = (EMPTY_LINE | SAME_INDENT stmt)* ENDMARKER
250 | eval_input = testlist NEWLINE? EMPTY_LINE* ENDMARKER
251 | 
252 | decorator! = "@" {dotted_name ("(" {arglist} ")")?} NEWLINE
253 | decorators! = decorator+
254 | decorated = decorators (classdef | funcdef)
255 | funcdef = "def" {NAME} "(" {parameters | void=parameters} ")" ":" {suite}
256 | # Check order validity elsewhere (at most one remaining_args and one kwargs)
257 | parameters! = {fpdef_opt (comma {fpdef_opt})*} comma?
258 | 
259 | fpdef = NAME | "(" fplist ")"
260 | fpdef_opt = fpdef ("=" {test})? | "*" {NAME=remaining_args} | "**" {NAME=kwargs}
261 | fplist = {fpdef (comma {fpdef})*} comma?
262 | 
263 | stmt = compound_stmt | simple_stmt
264 | simple_stmt = {small_stmt (";" {small_stmt})*} ";"? NEWLINE
265 | small_stmt = print_stmt | del_stmt | pass_stmt | flow_stmt | comment
266 |            | import_stmt | global_stmt | exec_stmt | assert_stmt | expr_stmt
267 | 
268 | expr_stmt = aug_assign | regular_assign | testlist
269 | aug_assign_symbol = "+=" | "-=" | "*=" | "/=" | "%=" | "&="
270 |                   | "|=" | "^=" | "<<=" | ">>=" | "**=" | "//="
271 | aug_assign = testlist aug_assign_symbol=operation (yield_expr|testlist)
272 | regular_assign = testlist ("=" {yield_expr|testlist})+
273 | # For normal assignments, additional restrictions enforced by the interpreter
274 | print_stmt! = "print" { {test ("," {test})*} ","?
275 |                       | ">>" test ( ("," test)+ ","? )? | void}
276 | del_stmt! = "del" hspacesp {exprlist}
277 | pass_stmt! = "pass" {}
278 | flow_stmt = break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
279 | break_stmt! = "break" {}
280 | continue_stmt! = "continue" {}
281 | return_stmt! = "return" {testlist?}
282 | yield_stmt = yield_expr
283 | raise_stmt! = "raise" {(test ("," test ("," test))?)?}
284 | import_stmt = import_name | import_from
285 | import_name = "import" {import_names}
286 | import_names! = dotted_as_name ("," {dotted_as_name})*
287 | import_from! = "from" {"."* dotted_name | "."+}
288 |                "import" {"*" | "(" {import_as_names} ")" | import_as_names}
289 | import_as_name = NAME ("as" {NAME})?
290 | dotted_as_name = dotted_name ("as" {NAME})?
291 | import_as_names! = {import_as_name ("," {import_as_name})*} ","?
292 | dotted_name = NAME ("." {NAME})*
293 | global_stmt = "global" NAME ("," NAME)*
294 | exec_stmt! = "exec" {expr ("in" {test} ("," {test})?)?}
295 | assert_stmt! = "assert" {test ("," test)?}
296 | 
297 | compound_stmt = if_stmt | while_stmt | for_stmt | try_stmt | with_stmt
298 |               | funcdef | classdef | decorated
299 | if_stmt = ("if" {test} ":" {suite})=single_if 
300 |           (("elif" {test} ":" {suite})=single_if)*
301 |           (("else" ":" {void=gen_true suite})=single_if)?
302 | while_stmt = "while" {test} ":" {suite ("else" ":" {suite})?}
303 | for_stmt = "for" {exprlist} "in" {testlist} ":" {suite} {{"else"} ":" {suite=elseblock}}?
304 | try_stmt! = "try" ":" {suite}
305 |             {(({exception} ":" {suite})=except_clause)+=except_clauses
306 |              ("else" ":" suite)?
307 |              ("finally" ":" suite)?
308 |              | "finally" ":" suite}
309 | with_stmt = "with" with_item ("," with_item)* ":" suite
310 | with_item = test ("as" expr)?
311 | exception! = "except" {(test (("as" | ",") {test})?)?}
312 | suite = NEWLINE INDENT {(SAME_INDENT stmt | EMPTY_LINE)+} DEDENT | simple_stmt
313 | 
314 | testlist = {test ("," {test})*} ","?
315 | yield_expr! = "yield" {testlist?}
316 | 
317 | test = lambdef | or_test ("if" {or_test} {("else" {test})?})?
318 | or_test = and_test ("or" {and_test})*
319 | and_test = not_test ("and" {not_test})*
320 | not_test = ("not" {not_test})=not_test | comparison
321 | 
322 | comparison = factor:start (hspaces {?(any_token(self.input))}
323 |                            hspaces {factor})*:oper_and_atoms
324 |              -> reformat_binary(start, oper_and_atoms)
325 | expr = factor:start (hspaces {?(any_token(self.input, binary=False))}
326 |                      hspaces {factor})*:oper_and_atoms
327 |      -> reformat_binary(start, oper_and_atoms)
328 | 
329 | factor = ("+"|"-"|"~")* power
330 | power = trailed_atom ("**" factor)?
331 | trailed_atom = atom:atom trailer*:trailers -> reformat_atom(atom, trailers)
332 | atom = "(" spaces {parenthesis} spaces ")"
333 |      | "[" spaces {listmaker | void=listmaker} spaces "]"
334 |      | "{" spaces {dictmaker} spaces "}"
335 |      | "{" {setmaker} spaces "}"
336 |      | "`" {(stmt | small_stmt)=thunk} "`"
337 |      | STRINGS | NAME | NUMBER
338 | parenthesis = yield_expr | testlist_comp=generator | tuple 
339 |             | test | void=no_param
340 | listmaker! = (test list_for list_iter*)=listcomp
341 |            | {test (comma {test})*} comma?
342 | testlist_comp = test list_for list_iter*
343 | tuple! = ({test} comma)+ test?
344 | lambdef! = "lambda" {parameters? | void=parameters} ":" {test}
345 | trailer = "(" spaces {arglist} spaces ")"
346 |         | "[" spaces {subscriptlist} spaces "]"
347 |         | "." {NAME}
348 | subscriptlist! = subscript=subscript ("," subscript=subscript)* ","?
349 | subscript = "..." | ({test?=start} ":" {test?=stop} {step?})=slice | test
350 | exprlist = {expr ("," {expr})*} ","?
351 | step! = ":" {test?}
352 | dictmaker! = ({test} ":" {test} {list_for} {list_iter*})=dictcomp
353 |            | {({test} ":" {test})=pair ((comma {test} ":" {test})=pair)*} comma?
354 |            | void
355 | 
356 | setmaker! = test (list_for list_iter* | (("," test)* ","?))
357 | 
358 | classdef = "class" {NAME} {("(" {testlist?} ")")?=parents} ":" {suite}
359 | 
360 | arglist! = ({argument} comma)* ( "**" {test=kwargs}
361 |                                | "*" {test=remaining_args ("," keyword_arg)* 
362 |                                       ("," "**" {test=kwargs})?}
363 |                                | {argument | void} )
364 |                                comma?
365 | comma = "," spaces
366 | 
367 | argument = keyword_arg | listcomp_arg
368 | keyword_arg = {test} "=" {test}
369 | listcomp_arg = test (list_for list_iter*)?
370 | 
371 | list_iter = list_for | list_if
372 | list_for = spaces "for" {exprlist} "in" {or_test} # {testlist_safe}
373 | list_if! = spaces "if" {or_test}
374 | 
375 | testlist_safe = or_test ((',' or_test)+ ','?)?
376 | testlist1 = test ("," test)*
377 | 
378 | comment! = '#' {(~'\n' {anything})*}
379 | NUMBER! = hspaces digit+:s -> int("".join(n[0] for n in s))
380 | # Probably need to check that the result isn't a reserved word.
381 | NAME! = hspaces {((letter | '_') (letter | digit | '_')*)}
382 | STRINGS = STRING (spaces {STRING})*
383 | STRING! = hspaces stype? '"' '"' '"' {(escaped_char | ~('"' '"' '"') {anything})*} '"' '"' '"'
384 |        | hspaces stype? '\'' {(escaped_char | ~'\'' anything)*} '\''
385 |        | hspaces stype? '"' {(escaped_char | ~'"' anything)*} '"'
386 | stype! = 'r'|'b'
387 | EMPTY_LINE = (hspaces comment? ('\n' | '\r'))=EMPTY_LINE
388 | NEWLINE = hspaces (comment hspaces)? ('\n' | '\r')
389 | SAME_INDENT = hspaces:s ?(self.indentation[-1] == (len(s) if s else 0))
390 | ENDMARKER = ~anything
391 | INDENT = ~~hspaces:s !(self.indentation.append(len(s) if s else 0))
392 | DEDENT = !(self.indentation.pop())
393 | """
394 | 
395 | extra = r"""
396 | escaped_char! = '\\' {'n'|'r'|'t'|'b'|'f'|'"'|'\''|'\\'}
397 | letter = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z'|'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'X'|'Y'|'Z'
398 | digit = '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
399 | hspaces = (' ' | '\t' | escaped_linebreak)*
400 | hspacesp = (' ' | '\t' | escaped_linebreak)+
401 | escaped_linebreak = '\\' {'\n'}
402 | space = '\t'|'\n'|'\r'|' '|comment
403 | spaces = space*
404 | """
405 | 
406 | tree = ['And',
407 |  ['rule', ['rule_name', 'name'], ['flags'], ['args'],
408 |   ['and',
409 |    ['or', ['apply', 'letter'], ['exactly', '_']],
410 |    ['quantified',
411 |     ['or', ['apply', 'letter'], ['apply', 'digit'], ['exactly', '_']],
412 |     ['quantifier', '*']]]],
413 |  ['rule', ['rule_name', 'expr'], ['flags'], ['args'],
414 |   ['or',
415 |    ['apply', 'apply'], ['apply', 'exactly'], ['apply', 'token'],
416 |    ['apply', 'parenthesis'], ['apply', 'output']]],
417 |  ['rule', ['rule_name', 'exactly'], ['flags', '!'], ['args'],
418 |   ['and',
419 |    ['token', "'"],
420 |    ['output',
421 |     ['quantified',
422 |      ['or', ['apply', 'escaped_char'],
423 |             ['and', ['negation', ['exactly', "'"]], ['apply', 'anything']]],
424 |      ['quantifier', '*']]],
425 |    ['token', "'"]]],
426 |  ['rule', ['rule_name', 'token'], ['flags', '!'], ['args'],
427 |   ['and',
428 |    ['token', '"'],
429 |    ['output',
430 |     ['quantified',
431 |      ['or',
432 |       ['apply', 'escaped_char'],
433 |       ['and', ['negation', ['exactly', '"']], ['apply', 'anything']]],
434 |      ['quantifier', '*']]],
435 |    ['token', '"']]],
436 |  ['rule', ['rule_name', 'escaped_char'], ['flags', '!'], ['args'], ['and',
437 |    ['exactly', '\\'],
438 |    ['output', ['or'] + [['exactly', s] for s in 'nrtbf"\'\\']]]],
439 |  ['rule', ['rule_name', 'apply'], ['flags', '!'], ['args'],
440 |   ['and', ['quantified', ['or', ['exactly', '\t'], ['exactly', ' ']],
441 |                          ['quantifier', '*']],
442 |           ['output', ['apply', 'name']]]],
443 |  ['rule', ['rule_name', 'parenthesis'], ['flags'], ['args'],
444 |   ['and', ['token', '('], ['output', ['apply', 'or']], ['token', ')']]],
445 |  ['rule', ['rule_name', 'output'], ['flags', '!'], ['args'],
446 |   ['and', ['token', '{'], ['output', ['apply', 'or']], ['token', '}']]],
447 |  ['rule', ['rule_name', 'not'], ['flags'], ['args'], ['or',
448 |    ['and',
449 |     ['token', '~'],
450 |     ['output', ['bound', ['apply', 'expr'], ['inline', 'negation']]]],
451 |    ['apply', 'expr']]],
452 |  ['rule', ['rule_name', 'quantified'], ['flags'], ['args'],
453 |   ['and',
454 |    ['apply', 'not'],
455 |    ['quantified',
456 |     ['bound',
457 |      ['or', ['exactly', '*'], ['exactly', '+'], ['exactly', '?']],
458 |      ['inline', 'quantifier']],
459 |     ['quantifier', '?']]]],
460 |  ['rule', ['rule_name', 'bound'], ['flags'], ['args'],
461 |   ['and',
462 |    ['apply', 'quantified'],
463 |    ['quantified',
464 |     ['and',
465 |      ['exactly', '='],
466 |      ['output', ['bound', ['apply', 'name'], ['inline', 'inline']]]],
467 |     ['quantifier', '?']]]],
468 |  ['rule', ['rule_name', 'and'], ['flags'], ['args'],
469 |   ['quantified', ['apply', 'bound'], ['quantifier', '*']]],
470 |  ['rule', ['rule_name', 'or'], ['flags'], ['args'], ['and',
471 |    ['apply', 'and'],
472 |    ['quantified',
473 |     ['and', ['token', '|'], ['output', ['apply', 'and']]],
474 |     ['quantifier', '*']]]],
475 |  ['rule', ['rule_name', 'rule'], ['flags'], ['args'],
476 |   ['and',
477 |    ['apply', 'spaces'],
478 |    ['output',
479 |     ['and',
480 |      ['bound', ['apply', 'name'], ['inline', 'rule_name']],
481 |      ['bound',
482 |       ['quantified', ['exactly', '!'], ['quantifier', '?']],
483 |       ['inline', 'flags']],
484 |      ['bound', ['apply', 'and'], ['inline', 'args']],
485 |      ['and', ['token', '='], ['output', ['apply', 'or']]]]]]],
486 |  ['rule', ['rule_name', 'grammar'], ['flags'], ['args'],
487 |   ['and',
488 |    ['output', ['quantified', ['apply', 'rule'], ['quantifier', '*']]],
489 |    ['apply', 'spaces']]],
490 |  ['rule', ['rule_name', 'letter'], ['flags'], ['args'],
491 |   ['or'] + [['exactly', s]
492 |             for s in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ']],
493 |  ['rule', ['rule_name', 'digit'], ['flags'], ['args'],
494 |   ['or'] + [['exactly', s] for s in '0123456789']],
495 |  ['rule', ['rule_name', 'space'], ['flags'], ['args'],
496 |   ['or'] + [['exactly', s] for s in '\t\n\r ']],
497 |  ['rule', ['rule_name', 'spaces'], ['flags'], ['args'],
498 |   ['quantified', ['apply', 'space'], ['quantifier', '*']]]]
499 | 
500 | if __name__ == "__main__":
501 |     import sys
502 |     i1 = Interpreter(simple_wrap_tree(tree))
503 |     match_tree1 = i1.match(i1.rules['grammar'][-1], grammar + extra)
504 |     i2 = Interpreter(match_tree1)
505 |     match_tree2 = i2.match(i2.rules['grammar'][-1], python_grammar + extra)
506 |     pyi = Interpreter(match_tree2, whitespace="\t \\")
507 |     ast = pyi.match(pyi.rules['file_input'][-1], open(sys.argv[-1]).read())
508 |     ast.pprint()
509 | 


--------------------------------------------------------------------------------
/test/boot_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | sys.setrecursionlimit(5000)
 4 | from pymetaterp.util import simple_wrap_tree
 5 | from pymetaterp import boot_tree, boot_stackless as boot, boot_grammar
 6 | 
 7 | grammar = boot_grammar.bootstrap + boot_grammar.extra
 8 | i1 = boot.Interpreter(simple_wrap_tree(boot_tree.tree))
 9 | match_tree = i1.match(i1.rules['grammar'][-1], grammar)
10 | i2 = boot.Interpreter(match_tree)
11 | match_tree2 = i2.match(i2.rules['grammar'][-1], grammar)
12 | i3 = boot.Interpreter(match_tree2)
13 | for i in range(3):
14 |     match_tree3 = i3.match(i3.rules['grammar'][-1], grammar)
15 |     i3 = boot.Interpreter(match_tree3)
16 | grammar += boot_grammar.diff
17 | match_tree3 = i3.match(i3.rules['grammar'][-1], grammar)
18 | print match_tree == match_tree2
19 | 


--------------------------------------------------------------------------------
/test/compiled_python_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | sys.setrecursionlimit(5000)
 4 | from pymetaterp.util import simple_wrap_tree
 5 | from pymetaterp import boot_tree, boot_grammar
 6 | from pymetaterp.boot_compiled import to_python, match
 7 | from pymetaterp import python_compiled, python_grammar
 8 | import os
 9 | 
10 | grammar = boot_grammar.bootstrap + boot_grammar.extra
11 | t1 = list(simple_wrap_tree(boot_tree.tree))
12 | t2 = match(t1, grammar)
13 | t3 = match(t2, grammar + boot_grammar.diff)
14 | pytree = match(t3, python_grammar.full_definition + python_grammar.extra)
15 | srctree = python_compiled.match(pytree, open(os.path.join("test", "python_ex.py")).read())
16 | srctree.pprint()
17 | 


--------------------------------------------------------------------------------
/test/compiled_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | sys.setrecursionlimit(5000)
 4 | from pymetaterp.util import simple_wrap_tree
 5 | from pymetaterp import boot_tree, boot_grammar
 6 | from pymetaterp.boot_compiled import to_python, match
 7 | 
 8 | t1 = list(simple_wrap_tree(boot_tree.tree))
 9 | grammar = boot_grammar.bootstrap + boot_grammar.extra
10 | t2 = match(t1, grammar)
11 | t3 = match(t2, grammar)
12 | assert(to_python(t2) == to_python(t3))
13 | 


--------------------------------------------------------------------------------
/test/python_parse_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | sys.setrecursionlimit(5000)
 4 | from pymetaterp.util import simple_wrap_tree
 5 | from pymetaterp import boot_grammar, boot_tree, boot_stackless as boot_terp, python, python_grammar
 6 | 
 7 | grammar = boot_grammar.bootstrap + boot_grammar.extra
 8 | i1 = boot_terp.Interpreter(simple_wrap_tree(boot_tree.tree))
 9 | # Not needed, just double checking
10 | match_tree = i1.match(i1.rules['grammar'][-1], grammar)
11 | i2 = boot_terp.Interpreter(match_tree)
12 | match_tree2 = i2.match(i2.rules['grammar'][-1], grammar + boot_grammar.diff)
13 | i3 = boot_terp.Interpreter(match_tree2)
14 | match_tree3 = i3.match(i3.rules['grammar'][-1], python_grammar.full_definition + python_grammar.extra)
15 | pyi = python.Interpreter(match_tree3)
16 | pyimatch_tree = pyi.match(pyi.rules['grammar'][-1], open("test/python_parse_test.py").read())
17 | pyimatch_tree.pprint()
18 | print len(pyi.input[0]) == pyi.input[1] + 1
19 | 


--------------------------------------------------------------------------------