├── .gitignore ├── LICENSE ├── README.md ├── article-code ├── README.md ├── greet.py ├── handaxeweb.py ├── tailbiter0.py ├── tailbiter1.py ├── tailbiter1_py35.py ├── tailbiter2.py ├── tailbiter2_py35.py └── tailbiter2_py36.py ├── byterun ├── __init__.py ├── __main__.py ├── execfile.py └── interpreter.py ├── check_subset.py ├── compiler.py ├── grammar ├── README.md ├── __init__.py ├── metagrammar.py ├── parsiflage.py ├── parson3.py ├── subset ├── t ├── test-parse └── test-update ├── meta_via_parsiflage.py ├── metameta.py └── tests ├── __init__.py ├── test_basic.py ├── test_exceptions.py ├── test_functions.py └── vmtest.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Darius Bacon 2 | Byterun Copyright (c) 2013, Ned Batchelder 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 8 | of the Software, and to permit persons to whom the Software is furnished to do 9 | so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A compiler from a subset of Python 3.4 (starting with abstract syntax 2 | trees in Python's `ast` form) to CPython 3.4 bytecode. The compiler is 3 | coded in that same Python subset; it can compile itself. 4 | 5 | It can optionally run on top of a port of 6 | [byterun](https://github.com/nedbat/byterun) to Python 3.4. (The 7 | original Byterun runs in 2.7 or 3.3.) 8 | 9 | I've greatly stripped down and modified the version of byterun in this 10 | repo, and extended the compiler a bit, to run both together, i.e. the 11 | compiler-compiled compiler and interpreter on the interpreter. 12 | 13 | This is a continuation of 14 | https://github.com/darius/500lines/tree/master/bytecode-compiler 15 | 16 | See 17 | [article-code](https://github.com/darius/tailbiter/tree/master/article-code) 18 | for the version [published in Code Words](https://codewords.recurse.com/issues/seven/dragon-taming-with-tailbiter-a-bytecode-compiler). 19 | Also, for tweaks to run in Python 3.5 and 3.6. 20 | 21 | If you run a later CPython version, **don't expect this to work there**. I haven't tried it. 22 | -------------------------------------------------------------------------------- /article-code/README.md: -------------------------------------------------------------------------------- 1 | Here are source files extracted from my [Code Words](https://codewords.recurse.com/issues/seven) 2 | article on compiling 3 | Python. There are three versions of the compiler, each handling a 4 | larger subset of Python. To run them, you need Python 3.4. You 5 | also need `check_subset.py` from the parent directory; copy it down 6 | here. Then this should work: 7 | 8 | $ python3 tailbiter0.py greet.py 9 | Hi, Chrysophylax 10 | 11 | and likewise for the article's other example runs. 12 | 13 | If you have Python 3.5 instead, then use `tailbiter1_py35.py` and 14 | `tailbiter2_py35.py` in place of `tailbiter1.py` and `tailbiter2.py`. 15 | You can see what changes were needed with 16 | 17 | $ diff -u tailbiter1.py tailbiter1_py35.py 18 | 19 | and similarly for `tailbiter2`: the main change was to how dict 20 | literals get compiled. (The `BUILD_MAP` operation changes to expect 21 | all of the keys and values on the stack before it creates the dict.) 22 | 23 | For Python 3.6 there's `tailbiter2_py36.py`. Currently it can't 24 | compile itself, because it needs to generate a jump with a 25 | more-than-one-byte offset. 26 | 27 | After `tailbiter2` I added a few more features to be able to compile 28 | `byterun`; the result is `../compiler.py`. (It's for Python 3.4 only, 29 | for now, because those particular features changed in Python 3.5.) 30 | 31 | `handaxeweb.py` exists to extract the tailbiter source code from the article's 32 | Markdown source. Since the article is distributed as HTML, this is 33 | useless to you, but perhaps someone might adapt it for their own 34 | literate programming, so here you are. This is derived from an earlier 35 | version in Python of Kragen Sitaker's 36 | [handaxeweb](https://github.com/kragen/peg-bootstrap/blob/master/handaxeweb.md). 37 | -------------------------------------------------------------------------------- /article-code/greet.py: -------------------------------------------------------------------------------- 1 | name = 'Chrysophylax' 2 | print('Hi,', name) 3 | -------------------------------------------------------------------------------- /article-code/handaxeweb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Original by Kragen Sitaker. Refactored and hacked to: 3 | - look for code in ```quoted blocks``` instead of indented blocks 4 | - support versions like handaxeweb.lua 5 | - also support versions of the form "v2+" 6 | - write to subdirectories 7 | """ 8 | 9 | import re, sys 10 | 11 | new_chunk_pattern = re.compile(r'# in (.*?):\s*$') 12 | chunk_ref_pattern = re.compile(r'(?m)^([ \t]*)<<(.*)>>[ \t]*\n') 13 | 14 | def main(argv, infile): 15 | chunks = parse(infile) 16 | if argv[1:] == ['--list']: 17 | for name in sorted(find_roots(chunks)): 18 | print name 19 | else: 20 | if not argv[1:]: 21 | sys.stdout.write(expand(chunks, '*')) 22 | else: 23 | name, version = (argv[1], '0') if len(argv) == 2 else argv[1:] 24 | v = int(version) 25 | filename = '%d/%s' % (v, name) 26 | with open(filename, 'w') as outfile: 27 | outfile.write(expand(chunks, name, v)) 28 | 29 | def parse(infile): 30 | "Make a chunks table from :infile." 31 | chunk_name, chunks = '*', {} 32 | lines = iter(infile) 33 | for line in lines: 34 | if line.startswith("```"): 35 | line = next(lines) 36 | new_chunk = new_chunk_pattern.match(line) 37 | if new_chunk: 38 | chunk_name = new_chunk.group(1) 39 | line = next(lines) 40 | while line.rstrip() != "```": 41 | chunks[chunk_name] = chunks.get(chunk_name, '') + line 42 | line = next(lines) 43 | return chunks 44 | 45 | def find_roots(chunks): 46 | "Return a set of the chunk_names that aren't referenced in chunks." 47 | chunk_refs = {name 48 | for s in chunks.values() 49 | for indent, name in chunk_ref_pattern.findall(s)} 50 | return set(chunks.keys()) - chunk_refs 51 | 52 | def expand(chunks, name, version): 53 | "Return the named chunk with any chunk-references recursively expanded." 54 | template, latest = '', -1 55 | for v in range(version+1): 56 | t = chunks.get(name + ' v%d+' % v, '') 57 | if t: 58 | latest = v 59 | template += t 60 | for v in range(version, latest, -1): 61 | t = chunks.get(name + ' v%d' % v, '') 62 | if t: 63 | template += t 64 | break 65 | if not template: 66 | template = chunks[name] 67 | return chunk_ref_pattern.sub( 68 | lambda mo: indent(mo.group(1), expand(chunks, mo.group(2), version)), 69 | template) 70 | 71 | def indent(dent, text): 72 | "Return :text with :dent prepended to every line." 73 | return re.sub(r'(?m)^(?=.)', dent, text) # (?=.) to exclude \Z 74 | 75 | if __name__ == '__main__': 76 | main(sys.argv, sys.stdin) 77 | -------------------------------------------------------------------------------- /article-code/tailbiter0.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def Instruction(opcode, arg): 7 | return bytes([opcode] if arg is None else [opcode, arg % 256, arg // 256]) 8 | 9 | def concat(assemblies): return b''.join(assemblies) 10 | def SetLineNo(lineno): return b'' 11 | def make_lnotab(assembly): return 1, b'' 12 | def plumb_depths(assembly): return 10 13 | def assemble(assembly): return assembly 14 | 15 | def denotation(opcode): 16 | if opcode < dis.HAVE_ARGUMENT: 17 | return Instruction(opcode, None) 18 | else: 19 | return lambda arg: Instruction(opcode, arg) 20 | 21 | op = type('op', (), dict([(name, denotation(opcode)) 22 | for name, opcode in dis.opmap.items()])) 23 | def make_table(): 24 | table = collections.defaultdict(lambda: len(table)) 25 | return table 26 | 27 | def collect(table): 28 | return tuple(sorted(table, key=table.get)) 29 | def run(filename, module_name): 30 | f = open(filename) 31 | source = f.read() 32 | f.close() 33 | return module_from_ast(module_name, filename, ast.parse(source)) 34 | 35 | def module_from_ast(module_name, filename, t): 36 | code = code_for_module(module_name, filename, t) 37 | module = types.ModuleType(module_name, ast.get_docstring(t)) 38 | exec(code, module.__dict__) 39 | return module 40 | 41 | def code_for_module(module_name, filename, t): 42 | return CodeGen(filename, StubScope()).compile_module(t, module_name) 43 | 44 | class StubScope: freevars, cellvars, derefvars = (), (), () 45 | 46 | class CodeGen(ast.NodeVisitor): 47 | 48 | def __init__(self, filename, scope): 49 | self.filename = filename 50 | self.scope = scope 51 | self.constants = make_table() 52 | self.names = make_table() 53 | self.varnames = make_table() 54 | def compile_module(self, t, name): 55 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 56 | return self.make_code(assembly, name, 0) 57 | 58 | def make_code(self, assembly, name, argcount): 59 | kwonlyargcount = 0 60 | nlocals = len(self.varnames) 61 | stacksize = plumb_depths(assembly) 62 | flags = ( (0x02 if nlocals else 0) 63 | | (0x10 if self.scope.freevars else 0) 64 | | (0x40 if not self.scope.derefvars else 0)) 65 | firstlineno, lnotab = make_lnotab(assembly) 66 | return types.CodeType(argcount, kwonlyargcount, 67 | nlocals, stacksize, flags, assemble(assembly), 68 | self.collect_constants(), 69 | collect(self.names), collect(self.varnames), 70 | self.filename, name, firstlineno, lnotab, 71 | self.scope.freevars, self.scope.cellvars) 72 | def __call__(self, t): 73 | if isinstance(t, list): return concat(map(self, t)) 74 | assembly = self.visit(t) 75 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 76 | def generic_visit(self, t): 77 | raise NotImplementedError() 78 | def load_const(self, constant): 79 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 80 | 81 | def collect_constants(self): 82 | return tuple([constant for constant,_ in collect(self.constants)]) 83 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 84 | def visit_Num(self, t): return self.load_const(t.n) 85 | def visit_Str(self, t): return self.load_const(t.s) 86 | visit_Bytes = visit_Str 87 | def visit_Name(self, t): 88 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 89 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 90 | else: assert False 91 | 92 | def load(self, name): return op.LOAD_NAME(self.names[name]) 93 | def store(self, name): return op.STORE_NAME(self.names[name]) 94 | def visit_Call(self, t): 95 | assert len(t.args) < 256 and len(t.keywords) < 256 96 | return (self(t.func) + self(t.args) + self(t.keywords) 97 | + op.CALL_FUNCTION((len(t.keywords) << 8) | len(t.args))) 98 | 99 | def visit_keyword(self, t): 100 | return self.load_const(t.arg) + self(t.value) 101 | def visit_Expr(self, t): 102 | return self(t.value) + op.POP_TOP 103 | def visit_Assign(self, t): 104 | def compose(left, right): return op.DUP_TOP + left + right 105 | return self(t.value) + reduce(compose, map(self, t.targets)) 106 | 107 | if __name__ == '__main__': 108 | sys.argv.pop(0) 109 | run(sys.argv[0], '__main__') 110 | -------------------------------------------------------------------------------- /article-code/tailbiter1.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | def plumb_depths(assembly): 9 | depths = [0] 10 | assembly.plumb(depths) 11 | return max(depths) 12 | def make_lnotab(assembly): 13 | firstlineno, lnotab = None, [] 14 | byte, line = 0, None 15 | for next_byte, next_line in assembly.line_nos(0): 16 | if firstlineno is None: 17 | firstlineno = line = next_line 18 | elif line < next_line: 19 | while byte+255 < next_byte: 20 | lnotab.extend([255, 0]) 21 | byte = byte+255 22 | while line+255 < next_line: 23 | lnotab.extend([next_byte-byte, 255]) 24 | byte, line = next_byte, line+255 25 | if (byte, line) != (next_byte, next_line): 26 | lnotab.extend([next_byte-byte, next_line-line]) 27 | byte, line = next_byte, next_line 28 | return firstlineno or 1, bytes(lnotab) 29 | def concat(assemblies): 30 | return sum(assemblies, no_op) 31 | class Assembly: 32 | def __add__(self, other): 33 | return Chain(self, other) 34 | length = 0 35 | def resolve(self, start): 36 | return () 37 | def encode(self, start, addresses): 38 | return b'' 39 | def line_nos(self, start): 40 | return () 41 | def plumb(self, depths): 42 | pass 43 | 44 | no_op = Assembly() 45 | class Label(Assembly): 46 | def resolve(self, start): 47 | return ((self, start),) 48 | class SetLineNo(Assembly): 49 | def __init__(self, line): 50 | self.line = line 51 | def line_nos(self, start): 52 | return ((start, self.line),) 53 | class Instruction(Assembly): 54 | def __init__(self, opcode, arg): 55 | self.opcode = opcode 56 | self.arg = arg 57 | self.length = 1 if arg is None else 3 58 | def encode(self, start, addresses): 59 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 60 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+3) 61 | else: arg = self.arg 62 | if arg is None: return bytes([self.opcode]) 63 | else: return bytes([self.opcode, arg % 256, arg // 256]) 64 | def plumb(self, depths): 65 | arg = 0 if isinstance(self.arg, Label) else self.arg 66 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 67 | class OffsetStack(Assembly): 68 | def plumb(self, depths): 69 | depths.append(depths[-1] - 1) 70 | class Chain(Assembly): 71 | def __init__(self, assembly1, assembly2): 72 | self.part1 = assembly1 73 | self.part2 = assembly2 74 | self.length = assembly1.length + assembly2.length 75 | def resolve(self, start): 76 | return chain(self.part1.resolve(start), 77 | self.part2.resolve(start + self.part1.length)) 78 | def encode(self, start, addresses): 79 | return chain(self.part1.encode(start, addresses), 80 | self.part2.encode(start + self.part1.length, addresses)) 81 | def line_nos(self, start): 82 | return chain(self.part1.line_nos(start), 83 | self.part2.line_nos(start + self.part1.length)) 84 | def plumb(self, depths): 85 | self.part1.plumb(depths) 86 | self.part2.plumb(depths) 87 | 88 | def denotation(opcode): 89 | if opcode < dis.HAVE_ARGUMENT: 90 | return Instruction(opcode, None) 91 | else: 92 | return lambda arg: Instruction(opcode, arg) 93 | 94 | op = type('op', (), dict([(name, denotation(opcode)) 95 | for name, opcode in dis.opmap.items()])) 96 | def make_table(): 97 | table = collections.defaultdict(lambda: len(table)) 98 | return table 99 | 100 | def collect(table): 101 | return tuple(sorted(table, key=table.get)) 102 | def run(filename, module_name): 103 | f = open(filename) 104 | source = f.read() 105 | f.close() 106 | return module_from_ast(module_name, filename, ast.parse(source)) 107 | 108 | def module_from_ast(module_name, filename, t): 109 | code = code_for_module(module_name, filename, t) 110 | module = types.ModuleType(module_name, ast.get_docstring(t)) 111 | exec(code, module.__dict__) 112 | return module 113 | 114 | def code_for_module(module_name, filename, t): 115 | return CodeGen(filename, StubScope()).compile_module(t, module_name) 116 | 117 | class StubScope: freevars, cellvars, derefvars = (), (), () 118 | 119 | class CodeGen(ast.NodeVisitor): 120 | 121 | def __init__(self, filename, scope): 122 | self.filename = filename 123 | self.scope = scope 124 | self.constants = make_table() 125 | self.names = make_table() 126 | self.varnames = make_table() 127 | def compile_module(self, t, name): 128 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 129 | return self.make_code(assembly, name, 0) 130 | 131 | def make_code(self, assembly, name, argcount): 132 | kwonlyargcount = 0 133 | nlocals = len(self.varnames) 134 | stacksize = plumb_depths(assembly) 135 | flags = ( (0x02 if nlocals else 0) 136 | | (0x10 if self.scope.freevars else 0) 137 | | (0x40 if not self.scope.derefvars else 0)) 138 | firstlineno, lnotab = make_lnotab(assembly) 139 | return types.CodeType(argcount, kwonlyargcount, 140 | nlocals, stacksize, flags, assemble(assembly), 141 | self.collect_constants(), 142 | collect(self.names), collect(self.varnames), 143 | self.filename, name, firstlineno, lnotab, 144 | self.scope.freevars, self.scope.cellvars) 145 | def __call__(self, t): 146 | if isinstance(t, list): return concat(map(self, t)) 147 | assembly = self.visit(t) 148 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 149 | def generic_visit(self, t): 150 | raise NotImplementedError() 151 | def load_const(self, constant): 152 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 153 | 154 | def collect_constants(self): 155 | return tuple([constant for constant,_ in collect(self.constants)]) 156 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 157 | def visit_Num(self, t): return self.load_const(t.n) 158 | def visit_Str(self, t): return self.load_const(t.s) 159 | visit_Bytes = visit_Str 160 | def visit_Name(self, t): 161 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 162 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 163 | else: assert False 164 | 165 | def load(self, name): return op.LOAD_NAME(self.names[name]) 166 | def store(self, name): return op.STORE_NAME(self.names[name]) 167 | def visit_Call(self, t): 168 | assert len(t.args) < 256 and len(t.keywords) < 256 169 | return (self(t.func) + self(t.args) + self(t.keywords) 170 | + op.CALL_FUNCTION((len(t.keywords) << 8) | len(t.args))) 171 | 172 | def visit_keyword(self, t): 173 | return self.load_const(t.arg) + self(t.value) 174 | def visit_Expr(self, t): 175 | return self(t.value) + op.POP_TOP 176 | def visit_Assign(self, t): 177 | def compose(left, right): return op.DUP_TOP + left + right 178 | return self(t.value) + reduce(compose, map(self, t.targets)) 179 | def visit_If(self, t): 180 | orelse, after = Label(), Label() 181 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 182 | + self(t.body) + op.JUMP_FORWARD(after) 183 | + orelse + self(t.orelse) 184 | + after) 185 | def visit_IfExp(self, t): 186 | orelse, after = Label(), Label() 187 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 188 | + self(t.body) + op.JUMP_FORWARD(after) 189 | + OffsetStack() 190 | + orelse + self(t.orelse) 191 | + after) 192 | def visit_Dict(self, t): 193 | return (op.BUILD_MAP(min(0xFFFF, len(t.keys))) 194 | + concat([self(v) + self(k) + op.STORE_MAP 195 | for k, v in zip(t.keys, t.values)])) 196 | def visit_Subscript(self, t): 197 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 198 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 199 | 200 | def visit_Attribute(self, t): 201 | sub_op = self.attr_ops[type(t.ctx)] 202 | return self(t.value) + sub_op(self.names[t.attr]) 203 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 204 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 205 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 206 | 207 | def visit_sequence(self, t, build_op): 208 | if isinstance(t.ctx, ast.Load): 209 | return self(t.elts) + build_op(len(t.elts)) 210 | elif isinstance(t.ctx, ast.Store): 211 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 212 | else: 213 | assert False 214 | def visit_UnaryOp(self, t): 215 | return self(t.operand) + self.ops1[type(t.op)] 216 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 217 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 218 | def visit_BinOp(self, t): 219 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 220 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 221 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 222 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 223 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 224 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 225 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 226 | def visit_Compare(self, t): 227 | [operator], [right] = t.ops, t.comparators 228 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 229 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 230 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 231 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 232 | ast.Gt: '>', ast.GtE: '>='} 233 | def visit_BoolOp(self, t): 234 | op_jump = self.ops_bool[type(t.op)] 235 | def compose(left, right): 236 | after = Label() 237 | return left + op_jump(after) + OffsetStack() + right + after 238 | return reduce(compose, map(self, t.values)) 239 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 240 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 241 | def visit_Pass(self, t): 242 | return no_op 243 | 244 | def visit_Raise(self, t): 245 | return self(t.exc) + op.RAISE_VARARGS(1) 246 | def visit_Import(self, t): 247 | return concat([self.import_name(0, None, alias.name) 248 | + self.store(alias.asname or alias.name.split('.')[0]) 249 | for alias in t.names]) 250 | 251 | def visit_ImportFrom(self, t): 252 | fromlist = tuple([alias.name for alias in t.names]) 253 | return (self.import_name(t.level, fromlist, t.module) 254 | + concat([op.IMPORT_FROM(self.names[alias.name]) 255 | + self.store(alias.asname or alias.name) 256 | for alias in t.names]) 257 | + op.POP_TOP) 258 | 259 | def import_name(self, level, fromlist, name): 260 | return (self.load_const(level) 261 | + self.load_const(fromlist) 262 | + op.IMPORT_NAME(self.names[name])) 263 | def visit_While(self, t): 264 | loop, end = Label(), Label() 265 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 266 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 267 | + end) 268 | 269 | def visit_For(self, t): 270 | loop, end = Label(), Label() 271 | return ( self(t.iter) + op.GET_ITER 272 | + loop + op.FOR_ITER(end) + self(t.target) 273 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 274 | + end + OffsetStack()) 275 | 276 | if __name__ == '__main__': 277 | sys.argv.pop(0) 278 | run(sys.argv[0], '__main__') 279 | -------------------------------------------------------------------------------- /article-code/tailbiter1_py35.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | def plumb_depths(assembly): 9 | depths = [0] 10 | assembly.plumb(depths) 11 | return max(depths) 12 | def make_lnotab(assembly): 13 | firstlineno, lnotab = None, [] 14 | byte, line = 0, None 15 | for next_byte, next_line in assembly.line_nos(0): 16 | if firstlineno is None: 17 | firstlineno = line = next_line 18 | elif line < next_line: 19 | while byte+255 < next_byte: 20 | lnotab.extend([255, 0]) 21 | byte = byte+255 22 | while line+255 < next_line: 23 | lnotab.extend([next_byte-byte, 255]) 24 | byte, line = next_byte, line+255 25 | if (byte, line) != (next_byte, next_line): 26 | lnotab.extend([next_byte-byte, next_line-line]) 27 | byte, line = next_byte, next_line 28 | return firstlineno or 1, bytes(lnotab) 29 | def concat(assemblies): 30 | return sum(assemblies, no_op) 31 | class Assembly: 32 | def __add__(self, other): 33 | return Chain(self, other) 34 | length = 0 35 | def resolve(self, start): 36 | return () 37 | def encode(self, start, addresses): 38 | return b'' 39 | def line_nos(self, start): 40 | return () 41 | def plumb(self, depths): 42 | pass 43 | 44 | no_op = Assembly() 45 | class Label(Assembly): 46 | def resolve(self, start): 47 | return ((self, start),) 48 | class SetLineNo(Assembly): 49 | def __init__(self, line): 50 | self.line = line 51 | def line_nos(self, start): 52 | return ((start, self.line),) 53 | class Instruction(Assembly): 54 | def __init__(self, opcode, arg): 55 | self.opcode = opcode 56 | self.arg = arg 57 | self.length = 1 if arg is None else 3 58 | def encode(self, start, addresses): 59 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 60 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+3) 61 | else: arg = self.arg 62 | if arg is None: return bytes([self.opcode]) 63 | else: return bytes([self.opcode, arg % 256, arg // 256]) 64 | def plumb(self, depths): 65 | arg = 0 if isinstance(self.arg, Label) else self.arg 66 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 67 | class OffsetStack(Assembly): 68 | def plumb(self, depths): 69 | depths.append(depths[-1] - 1) 70 | class Chain(Assembly): 71 | def __init__(self, assembly1, assembly2): 72 | self.part1 = assembly1 73 | self.part2 = assembly2 74 | self.length = assembly1.length + assembly2.length 75 | def resolve(self, start): 76 | return chain(self.part1.resolve(start), 77 | self.part2.resolve(start + self.part1.length)) 78 | def encode(self, start, addresses): 79 | return chain(self.part1.encode(start, addresses), 80 | self.part2.encode(start + self.part1.length, addresses)) 81 | def line_nos(self, start): 82 | return chain(self.part1.line_nos(start), 83 | self.part2.line_nos(start + self.part1.length)) 84 | def plumb(self, depths): 85 | self.part1.plumb(depths) 86 | self.part2.plumb(depths) 87 | 88 | def denotation(opcode): 89 | if opcode < dis.HAVE_ARGUMENT: 90 | return Instruction(opcode, None) 91 | else: 92 | return lambda arg: Instruction(opcode, arg) 93 | 94 | op = type('op', (), dict([(name, denotation(opcode)) 95 | for name, opcode in dis.opmap.items()])) 96 | def make_table(): 97 | table = collections.defaultdict(lambda: len(table)) 98 | return table 99 | 100 | def collect(table): 101 | return tuple(sorted(table, key=table.get)) 102 | def run(filename, module_name): 103 | f = open(filename) 104 | source = f.read() 105 | f.close() 106 | return module_from_ast(module_name, filename, ast.parse(source)) 107 | 108 | def module_from_ast(module_name, filename, t): 109 | code = code_for_module(module_name, filename, t) 110 | module = types.ModuleType(module_name, ast.get_docstring(t)) 111 | exec(code, module.__dict__) 112 | return module 113 | 114 | def code_for_module(module_name, filename, t): 115 | return CodeGen(filename, StubScope()).compile_module(t, module_name) 116 | 117 | class StubScope: freevars, cellvars, derefvars = (), (), () 118 | 119 | class CodeGen(ast.NodeVisitor): 120 | 121 | def __init__(self, filename, scope): 122 | self.filename = filename 123 | self.scope = scope 124 | self.constants = make_table() 125 | self.names = make_table() 126 | self.varnames = make_table() 127 | def compile_module(self, t, name): 128 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 129 | return self.make_code(assembly, name, 0) 130 | 131 | def make_code(self, assembly, name, argcount): 132 | kwonlyargcount = 0 133 | nlocals = len(self.varnames) 134 | stacksize = plumb_depths(assembly) 135 | flags = ( (0x02 if nlocals else 0) 136 | | (0x10 if self.scope.freevars else 0) 137 | | (0x40 if not self.scope.derefvars else 0)) 138 | firstlineno, lnotab = make_lnotab(assembly) 139 | return types.CodeType(argcount, kwonlyargcount, 140 | nlocals, stacksize, flags, assemble(assembly), 141 | self.collect_constants(), 142 | collect(self.names), collect(self.varnames), 143 | self.filename, name, firstlineno, lnotab, 144 | self.scope.freevars, self.scope.cellvars) 145 | def __call__(self, t): 146 | if isinstance(t, list): return concat(map(self, t)) 147 | assembly = self.visit(t) 148 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 149 | def generic_visit(self, t): 150 | raise NotImplementedError() 151 | def load_const(self, constant): 152 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 153 | 154 | def collect_constants(self): 155 | return tuple([constant for constant,_ in collect(self.constants)]) 156 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 157 | def visit_Num(self, t): return self.load_const(t.n) 158 | def visit_Str(self, t): return self.load_const(t.s) 159 | visit_Bytes = visit_Str 160 | def visit_Name(self, t): 161 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 162 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 163 | else: assert False 164 | 165 | def load(self, name): return op.LOAD_NAME(self.names[name]) 166 | def store(self, name): return op.STORE_NAME(self.names[name]) 167 | def visit_Call(self, t): 168 | assert len(t.args) < 256 and len(t.keywords) < 256 169 | return (self(t.func) + self(t.args) + self(t.keywords) 170 | + op.CALL_FUNCTION((len(t.keywords) << 8) | len(t.args))) 171 | 172 | def visit_keyword(self, t): 173 | return self.load_const(t.arg) + self(t.value) 174 | def visit_Expr(self, t): 175 | return self(t.value) + op.POP_TOP 176 | def visit_Assign(self, t): 177 | def compose(left, right): return op.DUP_TOP + left + right 178 | return self(t.value) + reduce(compose, map(self, t.targets)) 179 | def visit_If(self, t): 180 | orelse, after = Label(), Label() 181 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 182 | + self(t.body) + op.JUMP_FORWARD(after) 183 | + orelse + self(t.orelse) 184 | + after) 185 | def visit_IfExp(self, t): 186 | orelse, after = Label(), Label() 187 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 188 | + self(t.body) + op.JUMP_FORWARD(after) 189 | + OffsetStack() 190 | + orelse + self(t.orelse) 191 | + after) 192 | def visit_Dict(self, t): 193 | return (concat([self(k) + self(v) 194 | for k, v in zip(t.keys, t.values)]) 195 | + op.BUILD_MAP(len(t.keys))) 196 | def visit_Subscript(self, t): 197 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 198 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 199 | 200 | def visit_Attribute(self, t): 201 | sub_op = self.attr_ops[type(t.ctx)] 202 | return self(t.value) + sub_op(self.names[t.attr]) 203 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 204 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 205 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 206 | 207 | def visit_sequence(self, t, build_op): 208 | if isinstance(t.ctx, ast.Load): 209 | return self(t.elts) + build_op(len(t.elts)) 210 | elif isinstance(t.ctx, ast.Store): 211 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 212 | else: 213 | assert False 214 | def visit_UnaryOp(self, t): 215 | return self(t.operand) + self.ops1[type(t.op)] 216 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 217 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 218 | def visit_BinOp(self, t): 219 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 220 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 221 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 222 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 223 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 224 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 225 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 226 | def visit_Compare(self, t): 227 | [operator], [right] = t.ops, t.comparators 228 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 229 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 230 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 231 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 232 | ast.Gt: '>', ast.GtE: '>='} 233 | def visit_BoolOp(self, t): 234 | op_jump = self.ops_bool[type(t.op)] 235 | def compose(left, right): 236 | after = Label() 237 | return left + op_jump(after) + OffsetStack() + right + after 238 | return reduce(compose, map(self, t.values)) 239 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 240 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 241 | def visit_Pass(self, t): 242 | return no_op 243 | 244 | def visit_Raise(self, t): 245 | return self(t.exc) + op.RAISE_VARARGS(1) 246 | def visit_Import(self, t): 247 | return concat([self.import_name(0, None, alias.name) 248 | + self.store(alias.asname or alias.name.split('.')[0]) 249 | for alias in t.names]) 250 | 251 | def visit_ImportFrom(self, t): 252 | fromlist = tuple([alias.name for alias in t.names]) 253 | return (self.import_name(t.level, fromlist, t.module) 254 | + concat([op.IMPORT_FROM(self.names[alias.name]) 255 | + self.store(alias.asname or alias.name) 256 | for alias in t.names]) 257 | + op.POP_TOP) 258 | 259 | def import_name(self, level, fromlist, name): 260 | return (self.load_const(level) 261 | + self.load_const(fromlist) 262 | + op.IMPORT_NAME(self.names[name])) 263 | def visit_While(self, t): 264 | loop, end = Label(), Label() 265 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 266 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 267 | + end) 268 | 269 | def visit_For(self, t): 270 | loop, end = Label(), Label() 271 | return ( self(t.iter) + op.GET_ITER 272 | + loop + op.FOR_ITER(end) + self(t.target) 273 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 274 | + end + OffsetStack()) 275 | 276 | if __name__ == '__main__': 277 | sys.argv.pop(0) 278 | run(sys.argv[0], '__main__') 279 | -------------------------------------------------------------------------------- /article-code/tailbiter2.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | def plumb_depths(assembly): 9 | depths = [0] 10 | assembly.plumb(depths) 11 | return max(depths) 12 | def make_lnotab(assembly): 13 | firstlineno, lnotab = None, [] 14 | byte, line = 0, None 15 | for next_byte, next_line in assembly.line_nos(0): 16 | if firstlineno is None: 17 | firstlineno = line = next_line 18 | elif line < next_line: 19 | while byte+255 < next_byte: 20 | lnotab.extend([255, 0]) 21 | byte = byte+255 22 | while line+255 < next_line: 23 | lnotab.extend([next_byte-byte, 255]) 24 | byte, line = next_byte, line+255 25 | if (byte, line) != (next_byte, next_line): 26 | lnotab.extend([next_byte-byte, next_line-line]) 27 | byte, line = next_byte, next_line 28 | return firstlineno or 1, bytes(lnotab) 29 | def concat(assemblies): 30 | return sum(assemblies, no_op) 31 | class Assembly: 32 | def __add__(self, other): 33 | return Chain(self, other) 34 | length = 0 35 | def resolve(self, start): 36 | return () 37 | def encode(self, start, addresses): 38 | return b'' 39 | def line_nos(self, start): 40 | return () 41 | def plumb(self, depths): 42 | pass 43 | 44 | no_op = Assembly() 45 | class Label(Assembly): 46 | def resolve(self, start): 47 | return ((self, start),) 48 | class SetLineNo(Assembly): 49 | def __init__(self, line): 50 | self.line = line 51 | def line_nos(self, start): 52 | return ((start, self.line),) 53 | class Instruction(Assembly): 54 | def __init__(self, opcode, arg): 55 | self.opcode = opcode 56 | self.arg = arg 57 | self.length = 1 if arg is None else 3 58 | def encode(self, start, addresses): 59 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 60 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+3) 61 | else: arg = self.arg 62 | if arg is None: return bytes([self.opcode]) 63 | else: return bytes([self.opcode, arg % 256, arg // 256]) 64 | def plumb(self, depths): 65 | arg = 0 if isinstance(self.arg, Label) else self.arg 66 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 67 | class OffsetStack(Assembly): 68 | def plumb(self, depths): 69 | depths.append(depths[-1] - 1) 70 | class Chain(Assembly): 71 | def __init__(self, assembly1, assembly2): 72 | self.part1 = assembly1 73 | self.part2 = assembly2 74 | self.length = assembly1.length + assembly2.length 75 | def resolve(self, start): 76 | return chain(self.part1.resolve(start), 77 | self.part2.resolve(start + self.part1.length)) 78 | def encode(self, start, addresses): 79 | return chain(self.part1.encode(start, addresses), 80 | self.part2.encode(start + self.part1.length, addresses)) 81 | def line_nos(self, start): 82 | return chain(self.part1.line_nos(start), 83 | self.part2.line_nos(start + self.part1.length)) 84 | def plumb(self, depths): 85 | self.part1.plumb(depths) 86 | self.part2.plumb(depths) 87 | 88 | def denotation(opcode): 89 | if opcode < dis.HAVE_ARGUMENT: 90 | return Instruction(opcode, None) 91 | else: 92 | return lambda arg: Instruction(opcode, arg) 93 | 94 | op = type('op', (), dict([(name, denotation(opcode)) 95 | for name, opcode in dis.opmap.items()])) 96 | def make_table(): 97 | table = collections.defaultdict(lambda: len(table)) 98 | return table 99 | 100 | def collect(table): 101 | return tuple(sorted(table, key=table.get)) 102 | def run(filename, module_name): 103 | f = open(filename) 104 | source = f.read() 105 | f.close() 106 | return module_from_ast(module_name, filename, ast.parse(source)) 107 | 108 | def module_from_ast(module_name, filename, t): 109 | code = code_for_module(module_name, filename, t) 110 | module = types.ModuleType(module_name, ast.get_docstring(t)) 111 | exec(code, module.__dict__) 112 | return module 113 | 114 | def code_for_module(module_name, filename, t): 115 | t = desugar(t) 116 | check_conformity(t) 117 | return CodeGen(filename, top_scope(t)).compile_module(t, module_name) 118 | 119 | class CodeGen(ast.NodeVisitor): 120 | 121 | def __init__(self, filename, scope): 122 | self.filename = filename 123 | self.scope = scope 124 | self.constants = make_table() 125 | self.names = make_table() 126 | self.varnames = make_table() 127 | def compile_module(self, t, name): 128 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 129 | return self.make_code(assembly, name, 0) 130 | 131 | def make_code(self, assembly, name, argcount): 132 | kwonlyargcount = 0 133 | nlocals = len(self.varnames) 134 | stacksize = plumb_depths(assembly) 135 | flags = ( (0x02 if nlocals else 0) 136 | | (0x10 if self.scope.freevars else 0) 137 | | (0x40 if not self.scope.derefvars else 0)) 138 | firstlineno, lnotab = make_lnotab(assembly) 139 | return types.CodeType(argcount, kwonlyargcount, 140 | nlocals, stacksize, flags, assemble(assembly), 141 | self.collect_constants(), 142 | collect(self.names), collect(self.varnames), 143 | self.filename, name, firstlineno, lnotab, 144 | self.scope.freevars, self.scope.cellvars) 145 | def __call__(self, t): 146 | if isinstance(t, list): return concat(map(self, t)) 147 | assembly = self.visit(t) 148 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 149 | def generic_visit(self, t): 150 | raise NotImplementedError() 151 | def load_const(self, constant): 152 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 153 | 154 | def collect_constants(self): 155 | return tuple([constant for constant,_ in collect(self.constants)]) 156 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 157 | def visit_Num(self, t): return self.load_const(t.n) 158 | def visit_Str(self, t): return self.load_const(t.s) 159 | visit_Bytes = visit_Str 160 | def visit_Name(self, t): 161 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 162 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 163 | else: assert False 164 | 165 | def load(self, name): 166 | access = self.scope.access(name) 167 | if access == 'fast': return op.LOAD_FAST(self.varnames[name]) 168 | elif access == 'name': return op.LOAD_NAME(self.names[name]) 169 | elif access == 'deref': return op.LOAD_DEREF(self.cell_index(name)) 170 | else: assert False 171 | 172 | def store(self, name): 173 | access = self.scope.access(name) 174 | if access == 'fast': return op.STORE_FAST(self.varnames[name]) 175 | elif access == 'name': return op.STORE_NAME(self.names[name]) 176 | elif access == 'deref': return op.STORE_DEREF(self.cell_index(name)) 177 | else: assert False 178 | 179 | def cell_index(self, name): 180 | return self.scope.derefvars.index(name) 181 | def visit_Call(self, t): 182 | assert len(t.args) < 256 and len(t.keywords) < 256 183 | return (self(t.func) + self(t.args) + self(t.keywords) 184 | + op.CALL_FUNCTION((len(t.keywords) << 8) | len(t.args))) 185 | 186 | def visit_keyword(self, t): 187 | return self.load_const(t.arg) + self(t.value) 188 | def visit_Expr(self, t): 189 | return self(t.value) + op.POP_TOP 190 | def visit_Assign(self, t): 191 | def compose(left, right): return op.DUP_TOP + left + right 192 | return self(t.value) + reduce(compose, map(self, t.targets)) 193 | def visit_If(self, t): 194 | orelse, after = Label(), Label() 195 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 196 | + self(t.body) + op.JUMP_FORWARD(after) 197 | + orelse + self(t.orelse) 198 | + after) 199 | def visit_IfExp(self, t): 200 | orelse, after = Label(), Label() 201 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 202 | + self(t.body) + op.JUMP_FORWARD(after) 203 | + OffsetStack() 204 | + orelse + self(t.orelse) 205 | + after) 206 | def visit_Dict(self, t): 207 | return (op.BUILD_MAP(min(0xFFFF, len(t.keys))) 208 | + concat([self(v) + self(k) + op.STORE_MAP 209 | for k, v in zip(t.keys, t.values)])) 210 | def visit_Subscript(self, t): 211 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 212 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 213 | 214 | def visit_Attribute(self, t): 215 | sub_op = self.attr_ops[type(t.ctx)] 216 | return self(t.value) + sub_op(self.names[t.attr]) 217 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 218 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 219 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 220 | 221 | def visit_sequence(self, t, build_op): 222 | if isinstance(t.ctx, ast.Load): 223 | return self(t.elts) + build_op(len(t.elts)) 224 | elif isinstance(t.ctx, ast.Store): 225 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 226 | else: 227 | assert False 228 | def visit_UnaryOp(self, t): 229 | return self(t.operand) + self.ops1[type(t.op)] 230 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 231 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 232 | def visit_BinOp(self, t): 233 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 234 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 235 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 236 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 237 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 238 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 239 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 240 | def visit_Compare(self, t): 241 | [operator], [right] = t.ops, t.comparators 242 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 243 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 244 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 245 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 246 | ast.Gt: '>', ast.GtE: '>='} 247 | def visit_BoolOp(self, t): 248 | op_jump = self.ops_bool[type(t.op)] 249 | def compose(left, right): 250 | after = Label() 251 | return left + op_jump(after) + OffsetStack() + right + after 252 | return reduce(compose, map(self, t.values)) 253 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 254 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 255 | def visit_Pass(self, t): 256 | return no_op 257 | 258 | def visit_Raise(self, t): 259 | return self(t.exc) + op.RAISE_VARARGS(1) 260 | def visit_Import(self, t): 261 | return concat([self.import_name(0, None, alias.name) 262 | + self.store(alias.asname or alias.name.split('.')[0]) 263 | for alias in t.names]) 264 | 265 | def visit_ImportFrom(self, t): 266 | fromlist = tuple([alias.name for alias in t.names]) 267 | return (self.import_name(t.level, fromlist, t.module) 268 | + concat([op.IMPORT_FROM(self.names[alias.name]) 269 | + self.store(alias.asname or alias.name) 270 | for alias in t.names]) 271 | + op.POP_TOP) 272 | 273 | def import_name(self, level, fromlist, name): 274 | return (self.load_const(level) 275 | + self.load_const(fromlist) 276 | + op.IMPORT_NAME(self.names[name])) 277 | def visit_While(self, t): 278 | loop, end = Label(), Label() 279 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 280 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 281 | + end) 282 | 283 | def visit_For(self, t): 284 | loop, end = Label(), Label() 285 | return ( self(t.iter) + op.GET_ITER 286 | + loop + op.FOR_ITER(end) + self(t.target) 287 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 288 | + end + OffsetStack()) 289 | def visit_Return(self, t): 290 | return ((self(t.value) if t.value else self.load_const(None)) 291 | + op.RETURN_VALUE) 292 | def visit_Function(self, t): 293 | code = self.sprout(t).compile_function(t) 294 | return self.make_closure(code, t.name) 295 | def sprout(self, t): 296 | return CodeGen(self.filename, self.scope.children[t]) 297 | def make_closure(self, code, name): 298 | if code.co_freevars: 299 | return (concat([op.LOAD_CLOSURE(self.cell_index(freevar)) 300 | for freevar in code.co_freevars]) 301 | + op.BUILD_TUPLE(len(code.co_freevars)) 302 | + self.load_const(code) + self.load_const(name) 303 | + op.MAKE_CLOSURE(0)) 304 | else: 305 | return (self.load_const(code) + self.load_const(name) 306 | + op.MAKE_FUNCTION(0)) 307 | def compile_function(self, t): 308 | self.load_const(ast.get_docstring(t)) 309 | for arg in t.args.args: 310 | self.varnames[arg.arg] 311 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 312 | return self.make_code(assembly, t.name, len(t.args.args)) 313 | def visit_ClassDef(self, t): 314 | code = self.sprout(t).compile_class(t) 315 | return (op.LOAD_BUILD_CLASS + self.make_closure(code, t.name) 316 | + self.load_const(t.name) 317 | + self(t.bases) 318 | + op.CALL_FUNCTION(2 + len(t.bases)) 319 | + self.store(t.name)) 320 | def compile_class(self, t): 321 | docstring = ast.get_docstring(t) 322 | assembly = ( self.load('__name__') + self.store('__module__') 323 | + self.load_const(t.name) + self.store('__qualname__') 324 | + (no_op if docstring is None else 325 | self.load_const(docstring) + self.store('__doc__')) 326 | + self(t.body) + self.load_const(None) + op.RETURN_VALUE) 327 | return self.make_code(assembly, t.name, 0) 328 | def desugar(t): 329 | return ast.fix_missing_locations(Desugarer().visit(t)) 330 | 331 | class Desugarer(ast.NodeTransformer): 332 | def visit_Assert(self, t): 333 | t = self.generic_visit(t) 334 | result = ast.If(t.test, 335 | [], 336 | [ast.Raise(Call(ast.Name('AssertionError', load), 337 | [] if t.msg is None else [t.msg]), 338 | None)]) 339 | return ast.copy_location(result, t) 340 | def visit_Lambda(self, t): 341 | t = self.generic_visit(t) 342 | result = Function('', t.args, [ast.Return(t.body)]) 343 | return ast.copy_location(result, t) 344 | 345 | def visit_FunctionDef(self, t): 346 | t = self.generic_visit(t) 347 | fn = Function(t.name, t.args, t.body) 348 | for d in reversed(t.decorator_list): 349 | fn = Call(d, [fn]) 350 | result = ast.Assign([ast.Name(t.name, store)], fn) 351 | return ast.copy_location(result, t) 352 | def visit_ListComp(self, t): 353 | t = self.generic_visit(t) 354 | add_element = ast.Attribute(ast.Name('.elements', load), 'append', load) 355 | body = ast.Expr(Call(add_element, [t.elt])) 356 | for loop in reversed(t.generators): 357 | for test in reversed(loop.ifs): 358 | body = ast.If(test, [body], []) 359 | body = ast.For(loop.target, loop.iter, [body], []) 360 | fn = [body, 361 | ast.Return(ast.Name('.elements', load))] 362 | args = ast.arguments([ast.arg('.elements', None)], None, [], None, [], []) 363 | result = Call(Function('', args, fn), 364 | [ast.List([], load)]) 365 | return ast.copy_location(result, t) 366 | class Function(ast.FunctionDef): 367 | _fields = ('name', 'args', 'body') 368 | 369 | load, store = ast.Load(), ast.Store() 370 | 371 | def Call(fn, args): 372 | return ast.Call(fn, args, [], None, None) 373 | def top_scope(t): 374 | top = Scope(t, ()) 375 | top.visit(t) 376 | top.analyze(set()) 377 | return top 378 | class Scope(ast.NodeVisitor): 379 | def __init__(self, t, defs): 380 | self.t = t 381 | self.children = {} # Enclosed sub-scopes 382 | self.defs = set(defs) # Variables defined 383 | self.uses = set() # Variables referenced 384 | 385 | def visit_ClassDef(self, t): 386 | self.defs.add(t.name) 387 | for expr in t.bases: self.visit(expr) 388 | subscope = Scope(t, ()) 389 | self.children[t] = subscope 390 | for stmt in t.body: subscope.visit(stmt) 391 | 392 | def visit_Function(self, t): 393 | subscope = Scope(t, [arg.arg for arg in t.args.args]) 394 | self.children[t] = subscope 395 | for stmt in t.body: subscope.visit(stmt) 396 | 397 | def visit_Import(self, t): 398 | for alias in t.names: 399 | self.defs.add(alias.asname or alias.name.split('.')[0]) 400 | 401 | def visit_ImportFrom(self, t): 402 | for alias in t.names: 403 | self.defs.add(alias.asname or alias.name) 404 | 405 | def visit_Name(self, t): 406 | if isinstance(t.ctx, ast.Load): self.uses.add(t.id) 407 | elif isinstance(t.ctx, ast.Store): self.defs.add(t.id) 408 | else: assert False 409 | def analyze(self, parent_defs): 410 | self.fastvars = self.defs if isinstance(self.t, Function) else set() 411 | for child in self.children.values(): 412 | child.analyze(parent_defs | self.fastvars) 413 | child_uses = set([var for child in self.children.values() 414 | for var in child.freevars]) 415 | uses = self.uses | child_uses 416 | self.cellvars = tuple(child_uses & self.fastvars) 417 | self.freevars = tuple(uses & (parent_defs - self.fastvars)) 418 | self.derefvars = self.cellvars + self.freevars 419 | def access(self, name): 420 | return ('deref' if name in self.derefvars else 421 | 'fast' if name in self.fastvars else 422 | 'name') 423 | 424 | if __name__ == '__main__': 425 | sys.argv.pop(0) 426 | run(sys.argv[0], '__main__') 427 | -------------------------------------------------------------------------------- /article-code/tailbiter2_py35.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | def plumb_depths(assembly): 9 | depths = [0] 10 | assembly.plumb(depths) 11 | return max(depths) 12 | def make_lnotab(assembly): 13 | firstlineno, lnotab = None, [] 14 | byte, line = 0, None 15 | for next_byte, next_line in assembly.line_nos(0): 16 | if firstlineno is None: 17 | firstlineno = line = next_line 18 | elif line < next_line: 19 | while byte+255 < next_byte: 20 | lnotab.extend([255, 0]) 21 | byte = byte+255 22 | while line+255 < next_line: 23 | lnotab.extend([next_byte-byte, 255]) 24 | byte, line = next_byte, line+255 25 | if (byte, line) != (next_byte, next_line): 26 | lnotab.extend([next_byte-byte, next_line-line]) 27 | byte, line = next_byte, next_line 28 | return firstlineno or 1, bytes(lnotab) 29 | def concat(assemblies): 30 | return sum(assemblies, no_op) 31 | class Assembly: 32 | def __add__(self, other): 33 | return Chain(self, other) 34 | length = 0 35 | def resolve(self, start): 36 | return () 37 | def encode(self, start, addresses): 38 | return b'' 39 | def line_nos(self, start): 40 | return () 41 | def plumb(self, depths): 42 | pass 43 | 44 | no_op = Assembly() 45 | class Label(Assembly): 46 | def resolve(self, start): 47 | return ((self, start),) 48 | class SetLineNo(Assembly): 49 | def __init__(self, line): 50 | self.line = line 51 | def line_nos(self, start): 52 | return ((start, self.line),) 53 | class Instruction(Assembly): 54 | def __init__(self, opcode, arg): 55 | self.opcode = opcode 56 | self.arg = arg 57 | self.length = 1 if arg is None else 3 58 | def encode(self, start, addresses): 59 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 60 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+3) 61 | else: arg = self.arg 62 | if arg is None: return bytes([self.opcode]) 63 | else: return bytes([self.opcode, arg % 256, arg // 256]) 64 | def plumb(self, depths): 65 | arg = 0 if isinstance(self.arg, Label) else self.arg 66 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 67 | class OffsetStack(Assembly): 68 | def plumb(self, depths): 69 | depths.append(depths[-1] - 1) 70 | class Chain(Assembly): 71 | def __init__(self, assembly1, assembly2): 72 | self.part1 = assembly1 73 | self.part2 = assembly2 74 | self.length = assembly1.length + assembly2.length 75 | def resolve(self, start): 76 | return chain(self.part1.resolve(start), 77 | self.part2.resolve(start + self.part1.length)) 78 | def encode(self, start, addresses): 79 | return chain(self.part1.encode(start, addresses), 80 | self.part2.encode(start + self.part1.length, addresses)) 81 | def line_nos(self, start): 82 | return chain(self.part1.line_nos(start), 83 | self.part2.line_nos(start + self.part1.length)) 84 | def plumb(self, depths): 85 | self.part1.plumb(depths) 86 | self.part2.plumb(depths) 87 | 88 | def denotation(opcode): 89 | if opcode < dis.HAVE_ARGUMENT: 90 | return Instruction(opcode, None) 91 | else: 92 | return lambda arg: Instruction(opcode, arg) 93 | 94 | op = type('op', (), dict([(name, denotation(opcode)) 95 | for name, opcode in dis.opmap.items()])) 96 | def make_table(): 97 | table = collections.defaultdict(lambda: len(table)) 98 | return table 99 | 100 | def collect(table): 101 | return tuple(sorted(table, key=table.get)) 102 | def run(filename, module_name): 103 | f = open(filename) 104 | source = f.read() 105 | f.close() 106 | return module_from_ast(module_name, filename, ast.parse(source)) 107 | 108 | def module_from_ast(module_name, filename, t): 109 | code = code_for_module(module_name, filename, t) 110 | module = types.ModuleType(module_name, ast.get_docstring(t)) 111 | exec(code, module.__dict__) 112 | return module 113 | 114 | def code_for_module(module_name, filename, t): 115 | t = desugar(t) 116 | check_conformity(t) 117 | return CodeGen(filename, top_scope(t)).compile_module(t, module_name) 118 | 119 | class CodeGen(ast.NodeVisitor): 120 | 121 | def __init__(self, filename, scope): 122 | self.filename = filename 123 | self.scope = scope 124 | self.constants = make_table() 125 | self.names = make_table() 126 | self.varnames = make_table() 127 | def compile_module(self, t, name): 128 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 129 | return self.make_code(assembly, name, 0) 130 | 131 | def make_code(self, assembly, name, argcount): 132 | kwonlyargcount = 0 133 | nlocals = len(self.varnames) 134 | stacksize = plumb_depths(assembly) 135 | flags = ( (0x02 if nlocals else 0) 136 | | (0x10 if self.scope.freevars else 0) 137 | | (0x40 if not self.scope.derefvars else 0)) 138 | firstlineno, lnotab = make_lnotab(assembly) 139 | return types.CodeType(argcount, kwonlyargcount, 140 | nlocals, stacksize, flags, assemble(assembly), 141 | self.collect_constants(), 142 | collect(self.names), collect(self.varnames), 143 | self.filename, name, firstlineno, lnotab, 144 | self.scope.freevars, self.scope.cellvars) 145 | def __call__(self, t): 146 | if isinstance(t, list): return concat(map(self, t)) 147 | assembly = self.visit(t) 148 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 149 | def generic_visit(self, t): 150 | raise NotImplementedError() 151 | def load_const(self, constant): 152 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 153 | 154 | def collect_constants(self): 155 | return tuple([constant for constant,_ in collect(self.constants)]) 156 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 157 | def visit_Num(self, t): return self.load_const(t.n) 158 | def visit_Str(self, t): return self.load_const(t.s) 159 | visit_Bytes = visit_Str 160 | def visit_Name(self, t): 161 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 162 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 163 | else: assert False 164 | 165 | def load(self, name): 166 | access = self.scope.access(name) 167 | if access == 'fast': return op.LOAD_FAST(self.varnames[name]) 168 | elif access == 'name': return op.LOAD_NAME(self.names[name]) 169 | elif access == 'deref': return op.LOAD_DEREF(self.cell_index(name)) 170 | else: assert False 171 | 172 | def store(self, name): 173 | access = self.scope.access(name) 174 | if access == 'fast': return op.STORE_FAST(self.varnames[name]) 175 | elif access == 'name': return op.STORE_NAME(self.names[name]) 176 | elif access == 'deref': return op.STORE_DEREF(self.cell_index(name)) 177 | else: assert False 178 | 179 | def cell_index(self, name): 180 | return self.scope.derefvars.index(name) 181 | def visit_Call(self, t): 182 | assert len(t.args) < 256 and len(t.keywords) < 256 183 | return (self(t.func) + self(t.args) + self(t.keywords) 184 | + op.CALL_FUNCTION((len(t.keywords) << 8) | len(t.args))) 185 | 186 | def visit_keyword(self, t): 187 | return self.load_const(t.arg) + self(t.value) 188 | def visit_Expr(self, t): 189 | return self(t.value) + op.POP_TOP 190 | def visit_Assign(self, t): 191 | def compose(left, right): return op.DUP_TOP + left + right 192 | return self(t.value) + reduce(compose, map(self, t.targets)) 193 | def visit_If(self, t): 194 | orelse, after = Label(), Label() 195 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 196 | + self(t.body) + op.JUMP_FORWARD(after) 197 | + orelse + self(t.orelse) 198 | + after) 199 | def visit_IfExp(self, t): 200 | orelse, after = Label(), Label() 201 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 202 | + self(t.body) + op.JUMP_FORWARD(after) 203 | + OffsetStack() 204 | + orelse + self(t.orelse) 205 | + after) 206 | def visit_Dict(self, t): 207 | return (concat([self(k) + self(v) 208 | for k, v in zip(t.keys, t.values)]) 209 | + op.BUILD_MAP(len(t.keys))) 210 | def visit_Subscript(self, t): 211 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 212 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 213 | 214 | def visit_Attribute(self, t): 215 | sub_op = self.attr_ops[type(t.ctx)] 216 | return self(t.value) + sub_op(self.names[t.attr]) 217 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 218 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 219 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 220 | 221 | def visit_sequence(self, t, build_op): 222 | if isinstance(t.ctx, ast.Load): 223 | return self(t.elts) + build_op(len(t.elts)) 224 | elif isinstance(t.ctx, ast.Store): 225 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 226 | else: 227 | assert False 228 | def visit_UnaryOp(self, t): 229 | return self(t.operand) + self.ops1[type(t.op)] 230 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 231 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 232 | def visit_BinOp(self, t): 233 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 234 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 235 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 236 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 237 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 238 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 239 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 240 | def visit_Compare(self, t): 241 | [operator], [right] = t.ops, t.comparators 242 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 243 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 244 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 245 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 246 | ast.Gt: '>', ast.GtE: '>='} 247 | def visit_BoolOp(self, t): 248 | op_jump = self.ops_bool[type(t.op)] 249 | def compose(left, right): 250 | after = Label() 251 | return left + op_jump(after) + OffsetStack() + right + after 252 | return reduce(compose, map(self, t.values)) 253 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 254 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 255 | def visit_Pass(self, t): 256 | return no_op 257 | 258 | def visit_Raise(self, t): 259 | return self(t.exc) + op.RAISE_VARARGS(1) 260 | def visit_Import(self, t): 261 | return concat([self.import_name(0, None, alias.name) 262 | + self.store(alias.asname or alias.name.split('.')[0]) 263 | for alias in t.names]) 264 | 265 | def visit_ImportFrom(self, t): 266 | fromlist = tuple([alias.name for alias in t.names]) 267 | return (self.import_name(t.level, fromlist, t.module) 268 | + concat([op.IMPORT_FROM(self.names[alias.name]) 269 | + self.store(alias.asname or alias.name) 270 | for alias in t.names]) 271 | + op.POP_TOP) 272 | 273 | def import_name(self, level, fromlist, name): 274 | return (self.load_const(level) 275 | + self.load_const(fromlist) 276 | + op.IMPORT_NAME(self.names[name])) 277 | def visit_While(self, t): 278 | loop, end = Label(), Label() 279 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 280 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 281 | + end) 282 | 283 | def visit_For(self, t): 284 | loop, end = Label(), Label() 285 | return ( self(t.iter) + op.GET_ITER 286 | + loop + op.FOR_ITER(end) + self(t.target) 287 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 288 | + end + OffsetStack()) 289 | def visit_Return(self, t): 290 | return ((self(t.value) if t.value else self.load_const(None)) 291 | + op.RETURN_VALUE) 292 | def visit_Function(self, t): 293 | code = self.sprout(t).compile_function(t) 294 | return self.make_closure(code, t.name) 295 | def sprout(self, t): 296 | return CodeGen(self.filename, self.scope.children[t]) 297 | def make_closure(self, code, name): 298 | if code.co_freevars: 299 | return (concat([op.LOAD_CLOSURE(self.cell_index(freevar)) 300 | for freevar in code.co_freevars]) 301 | + op.BUILD_TUPLE(len(code.co_freevars)) 302 | + self.load_const(code) + self.load_const(name) 303 | + op.MAKE_CLOSURE(0)) 304 | else: 305 | return (self.load_const(code) + self.load_const(name) 306 | + op.MAKE_FUNCTION(0)) 307 | def compile_function(self, t): 308 | self.load_const(ast.get_docstring(t)) 309 | for arg in t.args.args: 310 | self.varnames[arg.arg] 311 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 312 | return self.make_code(assembly, t.name, len(t.args.args)) 313 | def visit_ClassDef(self, t): 314 | code = self.sprout(t).compile_class(t) 315 | return (op.LOAD_BUILD_CLASS + self.make_closure(code, t.name) 316 | + self.load_const(t.name) 317 | + self(t.bases) 318 | + op.CALL_FUNCTION(2 + len(t.bases)) 319 | + self.store(t.name)) 320 | def compile_class(self, t): 321 | docstring = ast.get_docstring(t) 322 | assembly = ( self.load('__name__') + self.store('__module__') 323 | + self.load_const(t.name) + self.store('__qualname__') 324 | + (no_op if docstring is None else 325 | self.load_const(docstring) + self.store('__doc__')) 326 | + self(t.body) + self.load_const(None) + op.RETURN_VALUE) 327 | return self.make_code(assembly, t.name, 0) 328 | def desugar(t): 329 | return ast.fix_missing_locations(Desugarer().visit(t)) 330 | 331 | class Desugarer(ast.NodeTransformer): 332 | def visit_Assert(self, t): 333 | t = self.generic_visit(t) 334 | result = ast.If(t.test, 335 | [], 336 | [ast.Raise(Call(ast.Name('AssertionError', load), 337 | [] if t.msg is None else [t.msg]), 338 | None)]) 339 | return ast.copy_location(result, t) 340 | def visit_Lambda(self, t): 341 | t = self.generic_visit(t) 342 | result = Function('', t.args, [ast.Return(t.body)]) 343 | return ast.copy_location(result, t) 344 | 345 | def visit_FunctionDef(self, t): 346 | t = self.generic_visit(t) 347 | fn = Function(t.name, t.args, t.body) 348 | for d in reversed(t.decorator_list): 349 | fn = Call(d, [fn]) 350 | result = ast.Assign([ast.Name(t.name, store)], fn) 351 | return ast.copy_location(result, t) 352 | def visit_ListComp(self, t): 353 | t = self.generic_visit(t) 354 | add_element = ast.Attribute(ast.Name('.elements', load), 'append', load) 355 | body = ast.Expr(Call(add_element, [t.elt])) 356 | for loop in reversed(t.generators): 357 | for test in reversed(loop.ifs): 358 | body = ast.If(test, [body], []) 359 | body = ast.For(loop.target, loop.iter, [body], []) 360 | fn = [body, 361 | ast.Return(ast.Name('.elements', load))] 362 | args = ast.arguments([ast.arg('.elements', None)], None, [], None, [], []) 363 | result = Call(Function('', args, fn), 364 | [ast.List([], load)]) 365 | return ast.copy_location(result, t) 366 | class Function(ast.FunctionDef): 367 | _fields = ('name', 'args', 'body') 368 | 369 | load, store = ast.Load(), ast.Store() 370 | 371 | def Call(fn, args): 372 | return ast.Call(fn, args, []) 373 | def top_scope(t): 374 | top = Scope(t, ()) 375 | top.visit(t) 376 | top.analyze(set()) 377 | return top 378 | class Scope(ast.NodeVisitor): 379 | def __init__(self, t, defs): 380 | self.t = t 381 | self.children = {} # Enclosed sub-scopes 382 | self.defs = set(defs) # Variables defined 383 | self.uses = set() # Variables referenced 384 | 385 | def visit_ClassDef(self, t): 386 | self.defs.add(t.name) 387 | for expr in t.bases: self.visit(expr) 388 | subscope = Scope(t, ()) 389 | self.children[t] = subscope 390 | for stmt in t.body: subscope.visit(stmt) 391 | 392 | def visit_Function(self, t): 393 | subscope = Scope(t, [arg.arg for arg in t.args.args]) 394 | self.children[t] = subscope 395 | for stmt in t.body: subscope.visit(stmt) 396 | 397 | def visit_Import(self, t): 398 | for alias in t.names: 399 | self.defs.add(alias.asname or alias.name.split('.')[0]) 400 | 401 | def visit_ImportFrom(self, t): 402 | for alias in t.names: 403 | self.defs.add(alias.asname or alias.name) 404 | 405 | def visit_Name(self, t): 406 | if isinstance(t.ctx, ast.Load): self.uses.add(t.id) 407 | elif isinstance(t.ctx, ast.Store): self.defs.add(t.id) 408 | else: assert False 409 | def analyze(self, parent_defs): 410 | self.fastvars = self.defs if isinstance(self.t, Function) else set() 411 | for child in self.children.values(): 412 | child.analyze(parent_defs | self.fastvars) 413 | child_uses = set([var for child in self.children.values() 414 | for var in child.freevars]) 415 | uses = self.uses | child_uses 416 | self.cellvars = tuple(child_uses & self.fastvars) 417 | self.freevars = tuple(uses & (parent_defs - self.fastvars)) 418 | self.derefvars = self.cellvars + self.freevars 419 | def access(self, name): 420 | return ('deref' if name in self.derefvars else 421 | 'fast' if name in self.fastvars else 422 | 'name') 423 | 424 | if __name__ == '__main__': 425 | sys.argv.pop(0) 426 | run(sys.argv[0], '__main__') 427 | -------------------------------------------------------------------------------- /article-code/tailbiter2_py36.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | def plumb_depths(assembly): 9 | depths = [0] 10 | assembly.plumb(depths) 11 | return max(depths) 12 | def make_lnotab(assembly): 13 | firstlineno, lnotab = None, [] 14 | byte, line = 0, None 15 | for next_byte, next_line in assembly.line_nos(0): 16 | if firstlineno is None: 17 | firstlineno = line = next_line 18 | elif line < next_line: 19 | # Py3.6 changed to use signed bytes here, not unsigned. 20 | # This is a hack to keep the old logic, without taking advantage of 21 | # the new possibility of negative values. 22 | while byte+127 < next_byte: 23 | lnotab.extend([127, 0]) 24 | byte = byte+127 25 | while line+127 < next_line: 26 | lnotab.extend([next_byte-byte, 127]) 27 | byte, line = next_byte, line+127 28 | if (byte, line) != (next_byte, next_line): 29 | lnotab.extend([next_byte-byte, next_line-line]) 30 | byte, line = next_byte, next_line 31 | return firstlineno or 1, bytes(lnotab) 32 | def concat(assemblies): 33 | return sum(assemblies, no_op) 34 | class Assembly: 35 | def __add__(self, other): 36 | return Chain(self, other) 37 | length = 0 38 | def resolve(self, start): 39 | return () 40 | def encode(self, start, addresses): 41 | return b'' 42 | def line_nos(self, start): 43 | return () 44 | def plumb(self, depths): 45 | pass 46 | 47 | no_op = Assembly() 48 | class Label(Assembly): 49 | def resolve(self, start): 50 | return ((self, start),) 51 | class SetLineNo(Assembly): 52 | def __init__(self, line): 53 | self.line = line 54 | def line_nos(self, start): 55 | return ((start, self.line),) 56 | class Instruction(Assembly): 57 | # The instruction encoding has changed: now every instruction takes two bytes, 58 | # including an argument, which is 0 if the instruction doesn't use it. 59 | length = 2 60 | def __init__(self, opcode, arg): 61 | self.opcode = opcode 62 | self.arg = arg 63 | def encode(self, start, addresses): 64 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 65 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+2) 66 | else: arg = self.arg 67 | return bytes([self.opcode, arg or 0]) 68 | def plumb(self, depths): 69 | arg = 0 if isinstance(self.arg, Label) else self.arg 70 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 71 | class OffsetStack(Assembly): 72 | def plumb(self, depths): 73 | depths.append(depths[-1] - 1) 74 | class Chain(Assembly): 75 | def __init__(self, assembly1, assembly2): 76 | self.part1 = assembly1 77 | self.part2 = assembly2 78 | self.length = assembly1.length + assembly2.length 79 | def resolve(self, start): 80 | return chain(self.part1.resolve(start), 81 | self.part2.resolve(start + self.part1.length)) 82 | def encode(self, start, addresses): 83 | return chain(self.part1.encode(start, addresses), 84 | self.part2.encode(start + self.part1.length, addresses)) 85 | def line_nos(self, start): 86 | return chain(self.part1.line_nos(start), 87 | self.part2.line_nos(start + self.part1.length)) 88 | def plumb(self, depths): 89 | self.part1.plumb(depths) 90 | self.part2.plumb(depths) 91 | 92 | def denotation(opcode): 93 | if opcode < dis.HAVE_ARGUMENT: 94 | return Instruction(opcode, None) 95 | else: 96 | return lambda arg: Instruction(opcode, arg) 97 | 98 | op = type('op', (), dict([(name, denotation(opcode)) 99 | for name, opcode in dis.opmap.items()])) 100 | def make_table(): 101 | table = collections.defaultdict(lambda: len(table)) 102 | return table 103 | 104 | def collect(table): 105 | return tuple(sorted(table, key=table.get)) 106 | def run(filename, module_name): 107 | f = open(filename) 108 | source = f.read() 109 | f.close() 110 | return module_from_ast(module_name, filename, ast.parse(source)) 111 | 112 | def module_from_ast(module_name, filename, t): 113 | code = code_for_module(module_name, filename, t) 114 | module = types.ModuleType(module_name, ast.get_docstring(t)) 115 | exec(code, module.__dict__) 116 | return module 117 | 118 | def code_for_module(module_name, filename, t): 119 | t = desugar(t) 120 | check_conformity(t) 121 | return CodeGen(filename, top_scope(t)).compile_module(t, module_name) 122 | 123 | class CodeGen(ast.NodeVisitor): 124 | 125 | def __init__(self, filename, scope): 126 | self.filename = filename 127 | self.scope = scope 128 | self.constants = make_table() 129 | self.names = make_table() 130 | self.varnames = make_table() 131 | def compile_module(self, t, name): 132 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 133 | return self.make_code(assembly, name, 0) 134 | 135 | def make_code(self, assembly, name, argcount): 136 | kwonlyargcount = 0 137 | nlocals = len(self.varnames) 138 | stacksize = plumb_depths(assembly) 139 | flags = ( (0x02 if nlocals else 0) 140 | | (0x10 if self.scope.freevars else 0) 141 | | (0x40 if not self.scope.derefvars else 0)) 142 | firstlineno, lnotab = make_lnotab(assembly) 143 | return types.CodeType(argcount, kwonlyargcount, 144 | nlocals, stacksize, flags, assemble(assembly), 145 | self.collect_constants(), 146 | collect(self.names), collect(self.varnames), 147 | self.filename, name, firstlineno, lnotab, 148 | self.scope.freevars, self.scope.cellvars) 149 | def __call__(self, t): 150 | if isinstance(t, list): return concat(map(self, t)) 151 | assembly = self.visit(t) 152 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 153 | def generic_visit(self, t): 154 | raise NotImplementedError() 155 | def load_const(self, constant): 156 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 157 | 158 | def collect_constants(self): 159 | return tuple([constant for constant,_ in collect(self.constants)]) 160 | def visit_NameConstant(self, t): return self.load_const(t.value) # for None/True/False 161 | def visit_Num(self, t): return self.load_const(t.n) 162 | def visit_Str(self, t): return self.load_const(t.s) 163 | visit_Bytes = visit_Str 164 | def visit_Name(self, t): 165 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 166 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 167 | else: assert False 168 | 169 | def load(self, name): 170 | access = self.scope.access(name) 171 | if access == 'fast': return op.LOAD_FAST(self.varnames[name]) 172 | elif access == 'name': return op.LOAD_NAME(self.names[name]) 173 | elif access == 'deref': return op.LOAD_DEREF(self.cell_index(name)) 174 | else: assert False 175 | 176 | def store(self, name): 177 | access = self.scope.access(name) 178 | if access == 'fast': return op.STORE_FAST(self.varnames[name]) 179 | elif access == 'name': return op.STORE_NAME(self.names[name]) 180 | elif access == 'deref': return op.STORE_DEREF(self.cell_index(name)) 181 | else: assert False 182 | 183 | def cell_index(self, name): 184 | return self.scope.derefvars.index(name) 185 | def visit_Call(self, t): 186 | assert len(t.args) + len(t.keywords) < 256 187 | if t.keywords: 188 | return (self(t.func) + self(t.args) 189 | + self(t.keywords) + self.load_const(tuple([k.arg for k in t.keywords])) 190 | + op.CALL_FUNCTION_KW(len(t.args) + len(t.keywords))) 191 | else: 192 | return (self(t.func) + self(t.args) 193 | + op.CALL_FUNCTION(len(t.args))) 194 | 195 | def visit_keyword(self, t): 196 | return self(t.value) 197 | def visit_Expr(self, t): 198 | return self(t.value) + op.POP_TOP 199 | def visit_Assign(self, t): 200 | def compose(left, right): return op.DUP_TOP + left + right 201 | return self(t.value) + reduce(compose, map(self, t.targets)) 202 | def visit_If(self, t): 203 | orelse, after = Label(), Label() 204 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 205 | + self(t.body) + op.JUMP_FORWARD(after) 206 | + orelse + self(t.orelse) 207 | + after) 208 | def visit_IfExp(self, t): 209 | orelse, after = Label(), Label() 210 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 211 | + self(t.body) + op.JUMP_FORWARD(after) 212 | + OffsetStack() 213 | + orelse + self(t.orelse) 214 | + after) 215 | def visit_Dict(self, t): 216 | return (concat([self(k) + self(v) 217 | for k, v in zip(t.keys, t.values)]) 218 | + op.BUILD_MAP(len(t.keys))) 219 | def visit_Subscript(self, t): 220 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 221 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 222 | 223 | def visit_Attribute(self, t): 224 | sub_op = self.attr_ops[type(t.ctx)] 225 | return self(t.value) + sub_op(self.names[t.attr]) 226 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 227 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 228 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 229 | 230 | def visit_sequence(self, t, build_op): 231 | if isinstance(t.ctx, ast.Load): 232 | return self(t.elts) + build_op(len(t.elts)) 233 | elif isinstance(t.ctx, ast.Store): 234 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 235 | else: 236 | assert False 237 | def visit_UnaryOp(self, t): 238 | return self(t.operand) + self.ops1[type(t.op)] 239 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 240 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 241 | def visit_BinOp(self, t): 242 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 243 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 244 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 245 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 246 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 247 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 248 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 249 | def visit_Compare(self, t): 250 | [operator], [right] = t.ops, t.comparators 251 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 252 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 253 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 254 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 255 | ast.Gt: '>', ast.GtE: '>='} 256 | def visit_BoolOp(self, t): 257 | op_jump = self.ops_bool[type(t.op)] 258 | def compose(left, right): 259 | after = Label() 260 | return left + op_jump(after) + OffsetStack() + right + after 261 | return reduce(compose, map(self, t.values)) 262 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 263 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 264 | def visit_Pass(self, t): 265 | return no_op 266 | 267 | def visit_Raise(self, t): 268 | return self(t.exc) + op.RAISE_VARARGS(1) 269 | def visit_Import(self, t): 270 | return concat([self.import_name(0, None, alias.name) 271 | + self.store(alias.asname or alias.name.split('.')[0]) 272 | for alias in t.names]) 273 | 274 | def visit_ImportFrom(self, t): 275 | fromlist = tuple([alias.name for alias in t.names]) 276 | return (self.import_name(t.level, fromlist, t.module) 277 | + concat([op.IMPORT_FROM(self.names[alias.name]) 278 | + self.store(alias.asname or alias.name) 279 | for alias in t.names]) 280 | + op.POP_TOP) 281 | 282 | def import_name(self, level, fromlist, name): 283 | return (self.load_const(level) 284 | + self.load_const(fromlist) 285 | + op.IMPORT_NAME(self.names[name])) 286 | def visit_While(self, t): 287 | loop, end = Label(), Label() 288 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 289 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 290 | + end) 291 | 292 | def visit_For(self, t): 293 | loop, end = Label(), Label() 294 | return ( self(t.iter) + op.GET_ITER 295 | + loop + op.FOR_ITER(end) + self(t.target) 296 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 297 | + end + OffsetStack()) 298 | def visit_Return(self, t): 299 | return ((self(t.value) if t.value else self.load_const(None)) 300 | + op.RETURN_VALUE) 301 | def visit_Function(self, t): 302 | code = self.sprout(t).compile_function(t) 303 | return self.make_closure(code, t.name) 304 | def sprout(self, t): 305 | return CodeGen(self.filename, self.scope.children[t]) 306 | def make_closure(self, code, name): 307 | if code.co_freevars: 308 | return (concat([op.LOAD_CLOSURE(self.cell_index(freevar)) 309 | for freevar in code.co_freevars]) 310 | + op.BUILD_TUPLE(len(code.co_freevars)) 311 | + self.load_const(code) + self.load_const(name) 312 | + op.MAKE_FUNCTION(0x08)) 313 | else: 314 | return (self.load_const(code) + self.load_const(name) 315 | + op.MAKE_FUNCTION(0)) 316 | def compile_function(self, t): 317 | self.load_const(ast.get_docstring(t)) 318 | for arg in t.args.args: 319 | self.varnames[arg.arg] 320 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 321 | return self.make_code(assembly, t.name, len(t.args.args)) 322 | def visit_ClassDef(self, t): 323 | code = self.sprout(t).compile_class(t) 324 | return (op.LOAD_BUILD_CLASS + self.make_closure(code, t.name) 325 | + self.load_const(t.name) 326 | + self(t.bases) 327 | + op.CALL_FUNCTION(2 + len(t.bases)) 328 | + self.store(t.name)) 329 | def compile_class(self, t): 330 | docstring = ast.get_docstring(t) 331 | assembly = ( self.load('__name__') + self.store('__module__') 332 | + self.load_const(t.name) + self.store('__qualname__') 333 | + (no_op if docstring is None else 334 | self.load_const(docstring) + self.store('__doc__')) 335 | + self(t.body) + self.load_const(None) + op.RETURN_VALUE) 336 | return self.make_code(assembly, t.name, 0) 337 | def desugar(t): 338 | return ast.fix_missing_locations(Desugarer().visit(t)) 339 | 340 | class Desugarer(ast.NodeTransformer): 341 | def visit_Assert(self, t): 342 | t = self.generic_visit(t) 343 | result = ast.If(t.test, 344 | [], 345 | [ast.Raise(Call(ast.Name('AssertionError', load), 346 | [] if t.msg is None else [t.msg]), 347 | None)]) 348 | return ast.copy_location(result, t) 349 | def visit_Lambda(self, t): 350 | t = self.generic_visit(t) 351 | result = Function('', t.args, [ast.Return(t.body)]) 352 | return ast.copy_location(result, t) 353 | 354 | def visit_FunctionDef(self, t): 355 | t = self.generic_visit(t) 356 | fn = Function(t.name, t.args, t.body) 357 | for d in reversed(t.decorator_list): 358 | fn = Call(d, [fn]) 359 | result = ast.Assign([ast.Name(t.name, store)], fn) 360 | return ast.copy_location(result, t) 361 | def visit_ListComp(self, t): 362 | t = self.generic_visit(t) 363 | add_element = ast.Attribute(ast.Name('.elements', load), 'append', load) 364 | body = ast.Expr(Call(add_element, [t.elt])) 365 | for loop in reversed(t.generators): 366 | for test in reversed(loop.ifs): 367 | body = ast.If(test, [body], []) 368 | body = ast.For(loop.target, loop.iter, [body], []) 369 | fn = [body, 370 | ast.Return(ast.Name('.elements', load))] 371 | args = ast.arguments([ast.arg('.elements', None)], None, [], None, [], []) 372 | result = Call(Function('', args, fn), 373 | [ast.List([], load)]) 374 | return ast.copy_location(result, t) 375 | class Function(ast.FunctionDef): 376 | _fields = ('name', 'args', 'body') 377 | 378 | load, store = ast.Load(), ast.Store() 379 | 380 | def Call(fn, args): 381 | return ast.Call(fn, args, []) 382 | def top_scope(t): 383 | top = Scope(t, ()) 384 | top.visit(t) 385 | top.analyze(set()) 386 | return top 387 | class Scope(ast.NodeVisitor): 388 | def __init__(self, t, defs): 389 | self.t = t 390 | self.children = {} # Enclosed sub-scopes 391 | self.defs = set(defs) # Variables defined 392 | self.uses = set() # Variables referenced 393 | 394 | def visit_ClassDef(self, t): 395 | self.defs.add(t.name) 396 | for expr in t.bases: self.visit(expr) 397 | subscope = Scope(t, ()) 398 | self.children[t] = subscope 399 | for stmt in t.body: subscope.visit(stmt) 400 | 401 | def visit_Function(self, t): 402 | subscope = Scope(t, [arg.arg for arg in t.args.args]) 403 | self.children[t] = subscope 404 | for stmt in t.body: subscope.visit(stmt) 405 | 406 | def visit_Import(self, t): 407 | for alias in t.names: 408 | self.defs.add(alias.asname or alias.name.split('.')[0]) 409 | 410 | def visit_ImportFrom(self, t): 411 | for alias in t.names: 412 | self.defs.add(alias.asname or alias.name) 413 | 414 | def visit_Name(self, t): 415 | if isinstance(t.ctx, ast.Load): self.uses.add(t.id) 416 | elif isinstance(t.ctx, ast.Store): self.defs.add(t.id) 417 | else: assert False 418 | def analyze(self, parent_defs): 419 | self.fastvars = self.defs if isinstance(self.t, Function) else set() 420 | for child in self.children.values(): 421 | child.analyze(parent_defs | self.fastvars) 422 | child_uses = set([var for child in self.children.values() 423 | for var in child.freevars]) 424 | uses = self.uses | child_uses 425 | self.cellvars = tuple(child_uses & self.fastvars) 426 | self.freevars = tuple(uses & (parent_defs - self.fastvars)) 427 | self.derefvars = self.cellvars + self.freevars 428 | def access(self, name): 429 | return ('deref' if name in self.derefvars else 430 | 'fast' if name in self.fastvars else 431 | 'name') 432 | 433 | if __name__ == '__main__': 434 | sys.argv.pop(0) 435 | run(sys.argv[0], '__main__') 436 | -------------------------------------------------------------------------------- /byterun/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darius/tailbiter/2c4f0365b720c39495fabcca9a60899ac0a7df96/byterun/__init__.py -------------------------------------------------------------------------------- /byterun/__main__.py: -------------------------------------------------------------------------------- 1 | """A main program for Byterun.""" 2 | 3 | import argparse 4 | import logging 5 | 6 | from . import execfile 7 | 8 | parser = argparse.ArgumentParser( 9 | prog="byterun", 10 | description="Run Python programs with a Python bytecode interpreter.", 11 | ) 12 | parser.add_argument( 13 | '-m', dest='module', action='store_true', 14 | help="prog is a module name, not a file name.", 15 | ) 16 | parser.add_argument( 17 | '-v', '--verbose', dest='verbose', action='store_true', 18 | help="trace the execution of the bytecode.", 19 | ) 20 | parser.add_argument( 21 | 'prog', 22 | help="The program to run.", 23 | ) 24 | parser.add_argument( 25 | 'args', nargs=argparse.REMAINDER, 26 | help="Arguments to pass to the program.", 27 | ) 28 | args = parser.parse_args() 29 | 30 | if args.module: 31 | run_fn = execfile.run_python_module 32 | else: 33 | run_fn = execfile.run_python_file 34 | 35 | level = logging.DEBUG if args.verbose else logging.WARNING 36 | logging.basicConfig(level=level) 37 | 38 | argv = [args.prog] + args.args 39 | run_fn(args.prog, argv) 40 | -------------------------------------------------------------------------------- /byterun/execfile.py: -------------------------------------------------------------------------------- 1 | """Execute files of Python code.""" 2 | 3 | import builtins 4 | import imp 5 | import os 6 | import sys 7 | import tokenize 8 | 9 | from .interpreter import run 10 | 11 | 12 | # This code is ripped off from coverage.py. Define things it expects. 13 | open_source = tokenize.open 14 | NoSource = Exception 15 | 16 | def run_python_module(modulename, args): 17 | """Run a python module, as though with ``python -m name args...``. 18 | 19 | `modulename` is the name of the module, possibly a dot-separated name. 20 | `args` is the argument array to present as sys.argv, including the first 21 | element naming the module being executed. 22 | 23 | """ 24 | openfile = None 25 | glo, loc = globals(), locals() 26 | try: 27 | try: 28 | # Search for the module - inside its parent package, if any - using 29 | # standard import mechanics. 30 | if '.' in modulename: 31 | packagename, name = modulename.rsplit('.', 1) 32 | package = __import__(packagename, glo, loc, ['__path__']) 33 | searchpath = package.__path__ 34 | else: 35 | packagename, name = None, modulename 36 | searchpath = None # "top-level search" in imp.find_module() 37 | openfile, pathname, _ = imp.find_module(name, searchpath) 38 | 39 | # Complain if this is a magic non-file module. 40 | if openfile is None and pathname is None: 41 | raise NoSource( 42 | "module does not live in a file: %r" % modulename 43 | ) 44 | 45 | # If `modulename` is actually a package, not a mere module, then we 46 | # pretend to be Python 2.7 and try running its __main__.py script. 47 | if openfile is None: 48 | packagename = modulename 49 | name = '__main__' 50 | package = __import__(packagename, glo, loc, ['__path__']) 51 | searchpath = package.__path__ 52 | openfile, pathname, _ = imp.find_module(name, searchpath) 53 | except ImportError: 54 | _, err, _ = sys.exc_info() 55 | raise NoSource(str(err)) 56 | finally: 57 | if openfile: 58 | openfile.close() 59 | 60 | # Finally, hand the file off to run_python_file for execution. 61 | args[0] = pathname 62 | run_python_file(pathname, args, package=packagename) 63 | 64 | 65 | def run_python_file(filename, args, package=None): 66 | """Run a python file as if it were the main program on the command line. 67 | 68 | `filename` is the path to the file to execute, it need not be a .py file. 69 | `args` is the argument array to present as sys.argv, including the first 70 | element naming the file being executed. `package` is the name of the 71 | enclosing package, if any. 72 | 73 | """ 74 | try: 75 | source_file = open_source(filename) 76 | except IOError: 77 | raise NoSource("No file to run: %r" % filename) 78 | with source_file: 79 | source = source_file.read() 80 | 81 | # Create a module to serve as __main__ 82 | old_main_mod = sys.modules['__main__'] 83 | main_mod = imp.new_module('__main__') 84 | sys.modules['__main__'] = main_mod 85 | main_mod.__file__ = filename 86 | if package: 87 | main_mod.__package__ = package 88 | main_mod.__builtins__ = builtins 89 | 90 | # Set sys.argv and the first path element properly. 91 | old_argv = sys.argv 92 | old_path0 = sys.path[0] 93 | sys.argv = args 94 | if package: 95 | sys.path[0] = '' 96 | else: 97 | sys.path[0] = os.path.abspath(os.path.dirname(filename)) 98 | 99 | try: 100 | code = compile(source, filename, "exec") 101 | run(code, main_mod.__dict__, None) 102 | finally: 103 | # Restore the old __main__ 104 | sys.modules['__main__'] = old_main_mod 105 | 106 | # Restore the old argv and path 107 | sys.argv = old_argv 108 | sys.path[0] = old_path0 109 | -------------------------------------------------------------------------------- /byterun/interpreter.py: -------------------------------------------------------------------------------- 1 | """A pure-Python Python bytecode interpreter.""" 2 | # Derived from Byterun by Ned Batchelder, based on pyvm2 by Paul 3 | # Swartz (z3p), from http://www.twistedmatrix.com/users/z3p/ 4 | 5 | import builtins, dis, operator, types 6 | 7 | class Function: 8 | __slots__ = [ 9 | '__name__', '__code__', '__globals__', '__defaults__', '__closure__', 10 | '__dict__', '__doc__', 11 | ] 12 | 13 | def __init__(self, name, code, globs, defaults, closure): 14 | self.__name__ = name or code.co_name 15 | self.__code__ = code 16 | self.__globals__ = globs 17 | self.__defaults__ = tuple(defaults) 18 | self.__closure__ = closure 19 | self.__dict__ = {} 20 | self.__doc__ = code.co_consts[0] if code.co_consts else None 21 | 22 | def __repr__(self): # pragma: no cover 23 | return '' % (self.__name__, id(self)) 24 | 25 | def __get__(self, instance, owner): 26 | return self if instance is None else Method(instance, owner, self) 27 | 28 | def __call__(self, *args, **kwargs): 29 | code = self.__code__ 30 | argc = code.co_argcount 31 | varargs = 0 != (code.co_flags & 0x04) 32 | varkws = 0 != (code.co_flags & 0x08) 33 | params = code.co_varnames[slice(0, argc+varargs+varkws)] 34 | 35 | defaults = self.__defaults__ 36 | nrequired = -len(defaults) if defaults else argc 37 | 38 | f_locals = dict(zip(params[slice(nrequired, None)], defaults)) 39 | f_locals.update(dict(zip(params, args))) 40 | if varargs: 41 | f_locals[params[argc]] = args[slice(argc, None)] 42 | elif argc < len(args): 43 | raise TypeError("%s() takes up to %d positional argument(s) but got %d" 44 | % (self.__name__, argc, len(args))) 45 | if varkws: 46 | f_locals[params[-1]] = varkw_dict = {} 47 | for kw, value in kwargs.items(): 48 | if kw in params: 49 | f_locals[kw] = value 50 | elif varkws: 51 | varkw_dict[kw] = value 52 | else: 53 | raise TypeError("%s() got an unexpected keyword argument %r" 54 | % (self.__name__, kw)) 55 | missing = [v for v in params[slice(0, nrequired)] if v not in f_locals] 56 | if missing: 57 | raise TypeError("%s() missing %d required positional argument%s: %s" 58 | % (code.co_name, 59 | len(missing), 's' if 1 < len(missing) else '', 60 | ', '.join(map(repr, missing)))) 61 | 62 | return run_frame(code, self.__closure__, self.__globals__, f_locals) 63 | 64 | class Method: 65 | def __init__(self, obj, _class, func): 66 | self.__self__ = obj 67 | self._class = _class 68 | self.__func__ = func 69 | 70 | def __repr__(self): # pragma: no cover 71 | name = "%s.%s" % (self._class.__name__, self.__func__.__name__) 72 | return '' % (name, self.__self__) 73 | 74 | def __call__(self, *args, **kwargs): 75 | return self.__func__(self.__self__, *args, **kwargs) 76 | 77 | class Cell: 78 | def __init__(self, value): 79 | self.contents = value 80 | 81 | class VirtualMachineError(Exception): 82 | "For raising errors in the operation of the VM." 83 | 84 | def run(code, f_globals, f_locals): 85 | if f_globals is None: f_globals = builtins.globals() 86 | if f_locals is None: f_locals = f_globals 87 | if '__builtins__' not in f_globals: 88 | f_globals['__builtins__'] = builtins.__dict__ 89 | return run_frame(code, None, f_globals, f_locals) 90 | 91 | def run_frame(code, f_closure, f_globals, f_locals): 92 | return Frame(code, f_closure, f_globals, f_locals).run() 93 | 94 | class Frame: 95 | def __init__(self, f_code, f_closure, f_globals, f_locals): 96 | self.f_code = f_code 97 | self.f_globals = f_globals 98 | self.f_locals = f_locals 99 | 100 | self.f_builtins = f_globals.get('__builtins__') 101 | if isinstance(self.f_builtins, types.ModuleType): 102 | self.f_builtins = self.f_builtins.__dict__ 103 | if self.f_builtins is None: 104 | self.f_builtins = {'None': None} 105 | 106 | self.stack = [] 107 | 108 | self.f_lineno = f_code.co_firstlineno # XXX doesn't get updated 109 | self.f_lasti = 0 110 | 111 | self.cells = {} if f_code.co_cellvars or f_code.co_freevars else None 112 | for var in f_code.co_cellvars: 113 | self.cells[var] = Cell(self.f_locals.get(var)) 114 | if f_code.co_freevars: 115 | assert len(f_code.co_freevars) == len(f_closure) 116 | self.cells.update(zip(f_code.co_freevars, f_closure)) 117 | 118 | def __repr__(self): # pragma: no cover 119 | return ('' 120 | % (id(self), self.f_code.co_filename, self.f_lineno)) 121 | 122 | def run(self): 123 | while True: 124 | byte_name, arguments = self.parse_byte_and_args() 125 | outcome = self.dispatch(byte_name, arguments) 126 | if outcome: 127 | assert outcome == 'return' 128 | return self.pop() 129 | 130 | def parse_byte_and_args(self): 131 | code = self.f_code 132 | opcode = code.co_code[self.f_lasti] 133 | self.f_lasti = self.f_lasti + 1 134 | if opcode >= dis.HAVE_ARGUMENT: 135 | int_arg = ( code.co_code[self.f_lasti] 136 | + (code.co_code[self.f_lasti+1] << 8)) 137 | self.f_lasti = self.f_lasti + 2 138 | if opcode in dis.hasconst: 139 | arg = code.co_consts[int_arg] 140 | elif opcode in dis.hasfree: 141 | if int_arg < len(code.co_cellvars): 142 | arg = code.co_cellvars[int_arg] 143 | else: 144 | arg = code.co_freevars[int_arg - len(code.co_cellvars)] 145 | elif opcode in dis.hasname: 146 | arg = code.co_names[int_arg] 147 | elif opcode in dis.haslocal: 148 | arg = code.co_varnames[int_arg] 149 | elif opcode in dis.hasjrel: 150 | arg = self.f_lasti + int_arg 151 | else: 152 | arg = int_arg 153 | return dis.opname[opcode], (arg,) 154 | return dis.opname[opcode], () 155 | 156 | def dispatch(self, byte_name, arguments): 157 | if byte_name.startswith('UNARY_'): 158 | self.unary_operator(byte_name.replace('UNARY_', '', 1)) 159 | elif byte_name.startswith('BINARY_'): 160 | self.binary_operator(byte_name.replace('BINARY_', '', 1)) 161 | else: 162 | return getattr(self, 'byte_%s' % byte_name)(*arguments) 163 | 164 | def top(self): 165 | return self.stack[-1] 166 | 167 | def push(self, val): 168 | self.stack.append(val) 169 | 170 | def pop(self): 171 | return self.stack.pop() 172 | 173 | def popn(self, n): 174 | vals = [self.stack.pop() for _ in range(n)] 175 | vals.reverse() 176 | return vals 177 | 178 | def jump(self, jump): 179 | self.f_lasti = jump 180 | 181 | def byte_POP_TOP(self): 182 | self.pop() 183 | 184 | def byte_DUP_TOP(self): 185 | self.push(self.top()) 186 | 187 | def byte_LOAD_CONST(self, const): 188 | self.push(const) 189 | 190 | def byte_LOAD_GLOBAL(self, name): # XXX not used by the compiler; just for comparison runs 191 | if name in self.f_globals: val = self.f_globals[name] 192 | elif name in self.f_builtins: val = self.f_builtins[name] 193 | else: raise NameError("name '%s' is not defined" % name) 194 | self.push(val) 195 | 196 | def byte_LOAD_NAME(self, name): 197 | if name in self.f_locals: val = self.f_locals[name] 198 | elif name in self.f_globals: val = self.f_globals[name] 199 | elif name in self.f_builtins: val = self.f_builtins[name] 200 | else: raise NameError("name '%s' is not defined" % name) 201 | self.push(val) 202 | 203 | def byte_STORE_NAME(self, name): 204 | self.f_locals[name] = self.pop() 205 | 206 | def byte_LOAD_FAST(self, name): 207 | if name not in self.f_locals: 208 | raise UnboundLocalError( 209 | "local variable '%s' referenced before assignment" % name) 210 | self.push(self.f_locals[name]) 211 | 212 | def byte_STORE_FAST(self, name): 213 | self.f_locals[name] = self.pop() 214 | 215 | def byte_LOAD_DEREF(self, name): 216 | self.push(self.cells[name].contents) 217 | 218 | def byte_STORE_DEREF(self, name): 219 | self.cells[name].contents = self.pop() 220 | 221 | UNARY_OPERATORS = { 222 | 'POSITIVE': operator.pos, 'NOT': operator.not_, 223 | 'NEGATIVE': operator.neg, 'INVERT': operator.invert, 224 | } 225 | 226 | def unary_operator(self, op): 227 | x = self.pop() 228 | self.push(self.UNARY_OPERATORS[op](x)) 229 | 230 | BINARY_OPERATORS = { 231 | 'POWER': pow, 'ADD': operator.add, 232 | 'LSHIFT': operator.lshift, 'SUBTRACT': operator.sub, 233 | 'RSHIFT': operator.rshift, 'MULTIPLY': operator.mul, 234 | 'OR': operator.or_, 'MODULO': operator.mod, 235 | 'AND': operator.and_, 'TRUE_DIVIDE': operator.truediv, 236 | 'XOR': operator.xor, 'FLOOR_DIVIDE': operator.floordiv, 237 | 'SUBSCR': operator.getitem, 238 | } 239 | 240 | def binary_operator(self, op): 241 | x, y = self.popn(2) 242 | self.push(self.BINARY_OPERATORS[op](x, y)) 243 | 244 | COMPARE_OPERATORS = [ 245 | operator.lt, 246 | operator.le, 247 | operator.eq, 248 | operator.ne, 249 | operator.gt, 250 | operator.ge, 251 | lambda x, y: x in y, 252 | lambda x, y: x not in y, 253 | lambda x, y: x is y, 254 | lambda x, y: x is not y, 255 | lambda x, y: issubclass(x, Exception) and issubclass(x, y), 256 | ] 257 | 258 | def byte_COMPARE_OP(self, opnum): 259 | x, y = self.popn(2) 260 | self.push(self.COMPARE_OPERATORS[opnum](x, y)) 261 | 262 | def byte_LOAD_ATTR(self, attr): 263 | obj = self.pop() 264 | val = getattr(obj, attr) 265 | self.push(val) 266 | 267 | def byte_STORE_ATTR(self, name): 268 | val, obj = self.popn(2) 269 | setattr(obj, name, val) 270 | 271 | def byte_STORE_SUBSCR(self): 272 | val, obj, subscr = self.popn(3) 273 | obj[subscr] = val 274 | 275 | def byte_BUILD_TUPLE(self, count): 276 | self.push(tuple(self.popn(count))) 277 | 278 | def byte_BUILD_LIST(self, count): 279 | self.push(self.popn(count)) 280 | 281 | def byte_BUILD_MAP(self, size): 282 | self.push({}) 283 | 284 | def byte_STORE_MAP(self): 285 | the_map, val, key = self.popn(3) 286 | the_map[key] = val 287 | self.push(the_map) 288 | 289 | def byte_UNPACK_SEQUENCE(self, count): 290 | seq = self.pop() 291 | for x in reversed(seq): 292 | self.push(x) 293 | 294 | def byte_LIST_APPEND(self, count): 295 | val = self.pop() 296 | self.stack[-count].append(val) 297 | 298 | def byte_JUMP_FORWARD(self, jump): 299 | self.jump(jump) 300 | 301 | def byte_JUMP_ABSOLUTE(self, jump): 302 | self.jump(jump) 303 | 304 | def byte_POP_JUMP_IF_TRUE(self, jump): # XXX not emitted by the compiler 305 | val = self.pop() 306 | if val: 307 | self.jump(jump) 308 | 309 | def byte_POP_JUMP_IF_FALSE(self, jump): 310 | val = self.pop() 311 | if not val: 312 | self.jump(jump) 313 | 314 | def byte_JUMP_IF_TRUE_OR_POP(self, jump): 315 | if self.top(): 316 | self.jump(jump) 317 | else: 318 | self.pop() 319 | 320 | def byte_JUMP_IF_FALSE_OR_POP(self, jump): 321 | if not self.top(): 322 | self.jump(jump) 323 | else: 324 | self.pop() 325 | 326 | def byte_SETUP_LOOP(self, dest): 327 | pass 328 | 329 | def byte_GET_ITER(self): 330 | self.push(iter(self.pop())) 331 | 332 | def byte_FOR_ITER(self, jump): 333 | void = object() 334 | element = next(self.top(), void) 335 | if element is void: 336 | self.pop() 337 | self.jump(jump) 338 | else: 339 | self.push(element) 340 | 341 | def byte_POP_BLOCK(self): 342 | pass 343 | 344 | def byte_RAISE_VARARGS(self, argc): 345 | assert argc == 1 346 | raise self.pop() 347 | 348 | def byte_MAKE_FUNCTION(self, argc): 349 | name = self.pop() 350 | code = self.pop() 351 | defaults = self.popn(argc) 352 | self.push(Function(name, code, self.f_globals, defaults, None)) 353 | 354 | def byte_LOAD_CLOSURE(self, name): 355 | self.push(self.cells[name]) 356 | 357 | def byte_MAKE_CLOSURE(self, argc): 358 | name = self.pop() 359 | closure, code = self.popn(2) 360 | defaults = self.popn(argc) 361 | globs = self.f_globals 362 | self.push(Function(name, code, globs, defaults, closure)) 363 | 364 | def byte_CALL_FUNCTION(self, arg): 365 | return self.call_function(arg, [], {}) 366 | 367 | def byte_CALL_FUNCTION_VAR(self, arg): 368 | varargs = self.pop() 369 | return self.call_function(arg, varargs, {}) 370 | 371 | def byte_CALL_FUNCTION_KW(self, arg): 372 | kwargs = self.pop() 373 | return self.call_function(arg, [], kwargs) 374 | 375 | def byte_CALL_FUNCTION_VAR_KW(self, arg): 376 | varargs, kwargs = self.popn(2) 377 | return self.call_function(arg, varargs, kwargs) 378 | 379 | def call_function(self, oparg, varargs, kwargs): 380 | len_kw, len_pos = divmod(oparg, 256) 381 | namedargs = dict([self.popn(2) for i in range(len_kw)]) 382 | namedargs.update(kwargs) 383 | posargs = self.popn(len_pos) 384 | posargs.extend(varargs) 385 | func = self.pop() 386 | self.push(func(*posargs, **namedargs)) 387 | 388 | def byte_RETURN_VALUE(self): 389 | return 'return' 390 | 391 | def byte_IMPORT_NAME(self, name): 392 | # XXX ceval.c is slightly different: looks up '__import__' in f_builtins first 393 | level, fromlist = self.popn(2) 394 | val = __import__(name, self.f_globals, self.f_locals, fromlist, level) 395 | self.push(val) 396 | 397 | def byte_IMPORT_FROM(self, name): 398 | # XXX ceval.c is slightly different: turns AttributeError into ImportError 399 | self.push(getattr(self.top(), name)) 400 | 401 | def byte_LOAD_BUILD_CLASS(self): 402 | self.push(build_class) 403 | 404 | def build_class(func, name, *bases, **kwds): 405 | if not isinstance(func, Function): 406 | raise TypeError("func must be a function") 407 | if not isinstance(name, str): 408 | raise TypeError("name is not a string") 409 | metaclass = kwds.pop('metaclass', None) 410 | if metaclass is None: 411 | metaclass = type(bases[0]) if bases else type 412 | if isinstance(metaclass, type): 413 | metaclass = calculate_metaclass(metaclass, bases) 414 | 415 | void = object() 416 | prepare = getattr(metaclass, '__prepare__', void) 417 | namespace = {} if prepare is void else prepare(name, bases, **kwds) 418 | 419 | cell = run_frame(func.__code__, func.__closure__, 420 | func.__globals__, namespace) 421 | 422 | cls = metaclass(name, bases, namespace) 423 | if isinstance(cell, Cell): 424 | cell.contents = cls 425 | return cls 426 | 427 | def calculate_metaclass(metaclass, bases): 428 | winner = metaclass 429 | for base in bases: 430 | t = type(base) 431 | if issubclass(t, winner): 432 | winner = t 433 | elif not issubclass(winner, t): 434 | raise TypeError("metaclass conflict", winner, t) 435 | return winner 436 | -------------------------------------------------------------------------------- /check_subset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Check if a program conforms to our Python subset. 3 | XXX check that names are legal Python identifiers, since our 4 | bytecompile assumes it can add illegal ones without clashing 5 | """ 6 | 7 | import ast 8 | 9 | def check_conformity(t): 10 | Checker().visit(t) 11 | 12 | class Checker(ast.NodeVisitor): 13 | 14 | def __init__(self, scope_type='module', in_loop=False): 15 | self.scope_type = scope_type 16 | self.in_loop = in_loop 17 | 18 | def generic_visit(self, t): 19 | "Any node type we don't know about is an error." 20 | assert False, "Unsupported syntax: %r" % (t,) 21 | 22 | def __call__(self, t): 23 | if isinstance(t, list): 24 | for child in t: 25 | self.visit(child) 26 | elif isinstance(t, ast.AST): 27 | self.visit(t) 28 | else: 29 | assert False, "Unexpected type: %r" % (t,) 30 | 31 | def visit_Module(self, t): 32 | assert self.scope_type == 'module', "Module inside %s" % self.scope_type 33 | self(t.body) 34 | 35 | def visit_Function(self, t): 36 | self.check_arguments(t.args) 37 | Checker('function', in_loop=False)(t.body) 38 | 39 | def visit_ClassDef(self, t): 40 | assert self.scope_type == 'module', ("Nested classes are not supported %r" 41 | % (t,)) 42 | self.check_identifier(t.name) 43 | self(t.bases) 44 | assert not t.keywords 45 | assert not t.starargs 46 | assert not t.kwargs 47 | assert not t.decorator_list 48 | Checker('class', in_loop=False)(t.body) 49 | 50 | def visit_Return(self, t): 51 | if t.value is not None: 52 | self(t.value) 53 | 54 | def visit_Assign(self, t): 55 | assert t.targets, "At least one target required: %r" % (t,) 56 | self(t.targets) 57 | self(t.value) 58 | 59 | def visit_For(self, t): 60 | self(t.target) 61 | self(t.iter) 62 | Checker(self.scope_type, in_loop=True)(t.body) 63 | assert not t.orelse 64 | 65 | def visit_While(self, t): 66 | self(t.test) 67 | Checker(self.scope_type, in_loop=True)(t.body) 68 | assert not t.orelse 69 | 70 | def visit_If(self, t): 71 | self(t.test) 72 | self(t.body) 73 | self(t.orelse) 74 | 75 | def visit_Raise(self, t): 76 | self(t.exc) 77 | assert not t.cause, "Cause argument not supported: %r" % (t,) 78 | 79 | def visit_Import(self, t): 80 | self(t.names) 81 | 82 | def visit_ImportFrom(self, t): 83 | self.check_identifier(t.module) 84 | self(t.names) 85 | 86 | def visit_alias(self, t): 87 | assert t.name != '*', "Star import not supported: %r" % (t,) 88 | self.check_identifier(t.name) 89 | if t.asname is not None: 90 | self.check_identifier(t.asname) 91 | 92 | def visit_Expr(self, t): 93 | self(t.value) 94 | 95 | def visit_Pass(self, t): 96 | pass 97 | 98 | def visit_Break(self, t): 99 | assert False, "break not supported" 100 | 101 | def visit_BoolOp(self, t): 102 | assert type(t.op) in self.ops_bool, "Unsupported boolean op: %r" % (t,) 103 | self(t.values) 104 | ops_bool = {ast.And, ast.Or} 105 | 106 | def visit_BinOp(self, t): 107 | assert type(t.op) in self.ops2, "Unsupported binary op: %r" % (t,) 108 | self(t.left) 109 | self(t.right) 110 | ops2 = {ast.Pow, ast.Add, 111 | ast.LShift, ast.Sub, 112 | ast.RShift, ast.Mult, 113 | ast.BitOr, ast.Mod, 114 | ast.BitAnd, ast.Div, 115 | ast.BitXor, ast.FloorDiv} 116 | 117 | def visit_UnaryOp(self, t): 118 | assert type(t.op) in self.ops1, "Unsupported unary op: %r" % (t,) 119 | self(t.operand) 120 | ops1 = {ast.UAdd, ast.Invert, 121 | ast.USub, ast.Not} 122 | 123 | visit_IfExp = visit_If 124 | 125 | def visit_Dict(self, t): 126 | for k, v in zip(t.keys, t.values): 127 | self(v) 128 | self(k) 129 | 130 | def visit_Set(self, t): 131 | assert False, "Set constructor not supported: %r" % (t,) 132 | 133 | def visit_Compare(self, t): 134 | self(t.left) 135 | assert 1 == len(t.ops), "Complex comparisons not supported: %r" % (t,) 136 | assert type(t.ops[0]) in self.ops_cmp, "Unsupported compare op: %r" % (t,) 137 | assert len(t.ops) == len(t.comparators), "Wrong number of arguments: %r" % (t,) 138 | self(t.comparators[0]) 139 | ops_cmp = {ast.Eq, ast.NotEq, ast.Is, ast.IsNot, 140 | ast.Lt, ast.LtE, ast.In, ast.NotIn, 141 | ast.Gt, ast.GtE} 142 | 143 | def visit_Call(self, t): 144 | self(t.func) 145 | self(t.args) 146 | self(t.keywords) 147 | if t.starargs: self(t.starargs) 148 | if t.kwargs: self(t.kwargs) 149 | 150 | def visit_keyword(self, t): 151 | self.check_identifier(t.arg) 152 | self(t.value) 153 | 154 | def visit_Num(self, t): 155 | # -0.0 is distinct from +0.0, but my compiler would mistakenly 156 | # coalesce the two, if both appear among the constants. Likewise 157 | # for -0.0 as a component of a complex number. As a hack, instead 158 | # of handling this case correctly in the compiler, we just forbid 159 | # it. It's especially unlikely to crop up because the parser even 160 | # parses -0.0 as UnaryOp(op=USub(), operand=Num(0.0)) -- you'd 161 | # have to build the AST some other way, to get Num(-0.0). 162 | assert not has_negzero(t.n), "Negative-zero literals not supported: %r" % (t,) 163 | 164 | def visit_Str(self, t): 165 | pass 166 | 167 | visit_Bytes = visit_Str 168 | 169 | def visit_Attribute(self, t): 170 | self(t.value) 171 | self.check_identifier(t.attr) 172 | if isinstance(t.ctx, ast.Load): pass 173 | elif isinstance(t.ctx, ast.Store): pass 174 | else: assert False, "Only loads and stores are supported: %r" % (t,) 175 | 176 | def visit_Subscript(self, t): 177 | self(t.value) 178 | if isinstance(t.slice, ast.Index): 179 | if isinstance(t.ctx, ast.Load): pass 180 | elif isinstance(t.ctx, ast.Store): pass 181 | else: assert False, "Only loads and stores are supported: %r" % (t,) 182 | self(t.slice.value) 183 | else: 184 | assert False, "Only simple subscripts are supported: %r" % (t,) 185 | 186 | def visit_NameConstant(self, t): 187 | pass 188 | 189 | def visit_Name(self, t): 190 | self.check_identifier(t.id) 191 | if isinstance(t.ctx, ast.Load): pass 192 | elif isinstance(t.ctx, ast.Store): pass 193 | else: assert False, "Only loads and stores are supported: %r" % (t,) 194 | 195 | def visit_sequence(self, t): 196 | self(t.elts) 197 | # XXX make sure there are no stars in elts 198 | if isinstance(t.ctx, ast.Load): pass 199 | elif isinstance(t.ctx, ast.Store): pass 200 | else: assert False, "Only loads and stores are supported: %r" % (t,) 201 | 202 | visit_List = visit_sequence 203 | visit_Tuple = visit_sequence 204 | 205 | def check_arguments(self, args): 206 | for arg in args.args: self.check_arg(arg) 207 | if args.vararg: self.check_arg(args.vararg) 208 | assert not args.kwonlyargs, "Keyword-only args are not supported: %r" % (args,) 209 | if args.kwarg: self.check_arg(args.kwarg) 210 | assert not args.defaults, "Default values are not supported: %r" % (args,) 211 | assert not args.kw_defaults, "Keyword default values are not supported: %r" % (args,) 212 | 213 | def check_arg(self, arg): 214 | self.check_identifier(arg.arg) 215 | 216 | def check_identifier(self, name): 217 | assert isinstance(name, str), "An identifier must be a string: %r" % (name,) 218 | # Not a private, mangled name: 219 | # XXX also make sure there's no '.' inside (the compiler will add some sometimes) 220 | assert len(name) <= 2 or not name.startswith('__') or name.endswith('__'), \ 221 | "Mangled private names are not supported: %r" % (name,) 222 | 223 | def has_negzero(num): 224 | return (is_negzero(num) 225 | or (isinstance(num, complex) 226 | and (is_negzero(num.real) or is_negzero(num.imag)))) 227 | 228 | def is_negzero(num): 229 | return str(num) == '-0.0' 230 | -------------------------------------------------------------------------------- /compiler.py: -------------------------------------------------------------------------------- 1 | import ast, collections, dis, types, sys 2 | from functools import reduce 3 | from itertools import chain 4 | from check_subset import check_conformity 5 | 6 | def assemble(assembly): 7 | return bytes(iter(assembly.encode(0, dict(assembly.resolve(0))))) 8 | 9 | def plumb_depths(assembly): 10 | depths = [0] 11 | assembly.plumb(depths) 12 | return max(depths) 13 | 14 | def make_lnotab(assembly): 15 | firstlineno, lnotab = None, [] 16 | byte, line = 0, None 17 | for next_byte, next_line in assembly.line_nos(0): 18 | if firstlineno is None: 19 | firstlineno = line = next_line 20 | elif line < next_line: 21 | while byte+255 < next_byte: 22 | lnotab.extend([255, 0]) 23 | byte = byte+255 24 | while line+255 < next_line: 25 | lnotab.extend([next_byte-byte, 255]) 26 | byte, line = next_byte, line+255 27 | if (byte, line) != (next_byte, next_line): 28 | lnotab.extend([next_byte-byte, next_line-line]) 29 | byte, line = next_byte, next_line 30 | return firstlineno or 1, bytes(lnotab) 31 | 32 | def concat(assemblies): 33 | return sum(assemblies, no_op) 34 | 35 | class Assembly: 36 | def __add__(self, other): 37 | return Chain(self, other) 38 | length = 0 39 | def resolve(self, start): 40 | return () 41 | def encode(self, start, addresses): 42 | return b'' 43 | def line_nos(self, start): 44 | return () 45 | def plumb(self, depths): 46 | pass 47 | 48 | no_op = Assembly() 49 | 50 | class Label(Assembly): 51 | def resolve(self, start): 52 | return ((self, start),) 53 | 54 | class SetLineNo(Assembly): 55 | def __init__(self, line): 56 | self.line = line 57 | def line_nos(self, start): 58 | return ((start, self.line),) 59 | 60 | class Instruction(Assembly): 61 | def __init__(self, opcode, arg): 62 | self.opcode = opcode 63 | self.arg = arg 64 | self.length = 1 if arg is None else 3 65 | def encode(self, start, addresses): 66 | if self.opcode in dis.hasjabs: arg = addresses[self.arg] 67 | elif self.opcode in dis.hasjrel: arg = addresses[self.arg] - (start+3) 68 | else: arg = self.arg 69 | if arg is None: return bytes([self.opcode]) 70 | else: return bytes([self.opcode, arg % 256, arg // 256]) 71 | def plumb(self, depths): 72 | arg = 0 if isinstance(self.arg, Label) else self.arg 73 | depths.append(depths[-1] + dis.stack_effect(self.opcode, arg)) 74 | 75 | class Chain(Assembly): 76 | def __init__(self, assembly1, assembly2): 77 | self.part1 = assembly1 78 | self.part2 = assembly2 79 | self.length = assembly1.length + assembly2.length 80 | def resolve(self, start): 81 | return chain(self.part1.resolve(start), 82 | self.part2.resolve(start + self.part1.length)) 83 | def encode(self, start, addresses): 84 | return chain(self.part1.encode(start, addresses), 85 | self.part2.encode(start + self.part1.length, addresses)) 86 | def line_nos(self, start): 87 | return chain(self.part1.line_nos(start), 88 | self.part2.line_nos(start + self.part1.length)) 89 | def plumb(self, depths): 90 | self.part1.plumb(depths) 91 | self.part2.plumb(depths) 92 | 93 | class OffsetStack(Assembly): 94 | def plumb(self, depths): 95 | depths.append(depths[-1] - 1) 96 | 97 | def denotation(opcode): 98 | if opcode < dis.HAVE_ARGUMENT: 99 | return Instruction(opcode, None) 100 | else: 101 | return lambda arg: Instruction(opcode, arg) 102 | 103 | op = type('op', (), dict([(name, denotation(opcode)) 104 | for name, opcode in dis.opmap.items()])) 105 | 106 | class CodeGen(ast.NodeVisitor): 107 | def __init__(self, filename, scope): 108 | self.filename = filename 109 | self.scope = scope 110 | self.constants = make_table() 111 | self.names = make_table() 112 | self.varnames = make_table() 113 | 114 | def compile_module(self, t, name): 115 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 116 | return self.make_code(assembly, name, 0, False, False) 117 | 118 | def make_code(self, assembly, name, argcount, has_varargs, has_varkws): 119 | kwonlyargcount = 0 120 | nlocals = len(self.varnames) 121 | stacksize = plumb_depths(assembly) 122 | flags = ( (0x02 if nlocals else 0) 123 | | (0x04 if has_varargs else 0) 124 | | (0x08 if has_varkws else 0) 125 | | (0x10 if self.scope.freevars else 0) 126 | | (0x40 if not self.scope.derefvars else 0)) 127 | firstlineno, lnotab = make_lnotab(assembly) 128 | return types.CodeType(argcount, kwonlyargcount, 129 | nlocals, stacksize, flags, assemble(assembly), 130 | self.collect_constants(), 131 | collect(self.names), collect(self.varnames), 132 | self.filename, name, firstlineno, lnotab, 133 | self.scope.freevars, self.scope.cellvars) 134 | 135 | def load_const(self, constant): 136 | return op.LOAD_CONST(self.constants[constant, type(constant)]) 137 | 138 | def collect_constants(self): 139 | return tuple([constant for constant,_ in collect(self.constants)]) 140 | 141 | def visit_NameConstant(self, t): return self.load_const(t.value) 142 | def visit_Num(self, t): return self.load_const(t.n) 143 | def visit_Str(self, t): return self.load_const(t.s) 144 | visit_Bytes = visit_Str 145 | 146 | def visit_Name(self, t): 147 | if isinstance(t.ctx, ast.Load): return self.load(t.id) 148 | elif isinstance(t.ctx, ast.Store): return self.store(t.id) 149 | else: assert False 150 | 151 | def load(self, name): 152 | access = self.scope.access(name) 153 | if access == 'fast': return op.LOAD_FAST(self.varnames[name]) 154 | elif access == 'deref': return op.LOAD_DEREF(self.cell_index(name)) 155 | elif access == 'name': return op.LOAD_NAME(self.names[name]) 156 | else: assert False 157 | 158 | def store(self, name): 159 | access = self.scope.access(name) 160 | if access == 'fast': return op.STORE_FAST(self.varnames[name]) 161 | elif access == 'deref': return op.STORE_DEREF(self.cell_index(name)) 162 | elif access == 'name': return op.STORE_NAME(self.names[name]) 163 | else: assert False 164 | 165 | def cell_index(self, name): 166 | return self.scope.derefvars.index(name) 167 | 168 | def visit_Call(self, t): 169 | assert len(t.args) < 256 and len(t.keywords) < 256 170 | opcode = (op.CALL_FUNCTION_VAR_KW if t.starargs and t.kwargs else 171 | op.CALL_FUNCTION_VAR if t.starargs else 172 | op.CALL_FUNCTION_KW if t.kwargs else 173 | op.CALL_FUNCTION) 174 | return (self(t.func) 175 | + self(t.args) 176 | + self(t.keywords) 177 | + (self(t.starargs) if t.starargs else no_op) 178 | + (self(t.kwargs) if t.kwargs else no_op) 179 | + opcode((len(t.keywords) << 8) | len(t.args))) 180 | 181 | def visit_keyword(self, t): 182 | return self.load_const(t.arg) + self(t.value) 183 | 184 | def __call__(self, t): 185 | if isinstance(t, list): return concat(map(self, t)) 186 | assembly = self.visit(t) 187 | return SetLineNo(t.lineno) + assembly if hasattr(t, 'lineno') else assembly 188 | 189 | def generic_visit(self, t): 190 | assert False, t 191 | 192 | def visit_Expr(self, t): 193 | return self(t.value) + op.POP_TOP 194 | 195 | def visit_Assign(self, t): 196 | def compose(left, right): return op.DUP_TOP + left + right 197 | return self(t.value) + reduce(compose, map(self, t.targets)) 198 | 199 | def visit_If(self, t): 200 | orelse, after = Label(), Label() 201 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 202 | + self(t.body) + op.JUMP_FORWARD(after) 203 | + orelse + self(t.orelse) 204 | + after) 205 | 206 | def visit_IfExp(self, t): 207 | orelse, after = Label(), Label() 208 | return ( self(t.test) + op.POP_JUMP_IF_FALSE(orelse) 209 | + self(t.body) + op.JUMP_FORWARD(after) 210 | + OffsetStack() 211 | + orelse + self(t.orelse) 212 | + after) 213 | 214 | def visit_Dict(self, t): 215 | return (op.BUILD_MAP(min(0xFFFF, len(t.keys))) 216 | + concat([self(v) + self(k) + op.STORE_MAP 217 | for k, v in zip(t.keys, t.values)])) 218 | 219 | def visit_Subscript(self, t): 220 | return self(t.value) + self(t.slice.value) + self.subscr_ops[type(t.ctx)] 221 | subscr_ops = {ast.Load: op.BINARY_SUBSCR, ast.Store: op.STORE_SUBSCR} 222 | 223 | def visit_Attribute(self, t): 224 | sub_op = self.attr_ops[type(t.ctx)] 225 | return self(t.value) + sub_op(self.names[t.attr]) 226 | attr_ops = {ast.Load: op.LOAD_ATTR, ast.Store: op.STORE_ATTR} 227 | 228 | def visit_List(self, t): return self.visit_sequence(t, op.BUILD_LIST) 229 | def visit_Tuple(self, t): return self.visit_sequence(t, op.BUILD_TUPLE) 230 | 231 | def visit_sequence(self, t, build_op): 232 | if isinstance(t.ctx, ast.Load): 233 | return self(t.elts) + build_op(len(t.elts)) 234 | elif isinstance(t.ctx, ast.Store): 235 | return op.UNPACK_SEQUENCE(len(t.elts)) + self(t.elts) 236 | else: 237 | assert False 238 | 239 | def visit_UnaryOp(self, t): 240 | return self(t.operand) + self.ops1[type(t.op)] 241 | ops1 = {ast.UAdd: op.UNARY_POSITIVE, ast.Invert: op.UNARY_INVERT, 242 | ast.USub: op.UNARY_NEGATIVE, ast.Not: op.UNARY_NOT} 243 | 244 | def visit_BinOp(self, t): 245 | return self(t.left) + self(t.right) + self.ops2[type(t.op)] 246 | ops2 = {ast.Pow: op.BINARY_POWER, ast.Add: op.BINARY_ADD, 247 | ast.LShift: op.BINARY_LSHIFT, ast.Sub: op.BINARY_SUBTRACT, 248 | ast.RShift: op.BINARY_RSHIFT, ast.Mult: op.BINARY_MULTIPLY, 249 | ast.BitOr: op.BINARY_OR, ast.Mod: op.BINARY_MODULO, 250 | ast.BitAnd: op.BINARY_AND, ast.Div: op.BINARY_TRUE_DIVIDE, 251 | ast.BitXor: op.BINARY_XOR, ast.FloorDiv: op.BINARY_FLOOR_DIVIDE} 252 | 253 | def visit_Compare(self, t): 254 | [operator], [right] = t.ops, t.comparators 255 | cmp_index = dis.cmp_op.index(self.ops_cmp[type(operator)]) 256 | return self(t.left) + self(right) + op.COMPARE_OP(cmp_index) 257 | ops_cmp = {ast.Eq: '==', ast.NotEq: '!=', ast.Is: 'is', ast.IsNot: 'is not', 258 | ast.Lt: '<', ast.LtE: '<=', ast.In: 'in', ast.NotIn: 'not in', 259 | ast.Gt: '>', ast.GtE: '>='} 260 | 261 | def visit_BoolOp(self, t): 262 | op_jump = self.ops_bool[type(t.op)] 263 | def compose(left, right): 264 | after = Label() 265 | return left + op_jump(after) + OffsetStack() + right + after 266 | return reduce(compose, map(self, t.values)) 267 | ops_bool = {ast.And: op.JUMP_IF_FALSE_OR_POP, 268 | ast.Or: op.JUMP_IF_TRUE_OR_POP} 269 | 270 | def visit_Pass(self, t): 271 | return no_op 272 | 273 | def visit_Raise(self, t): 274 | return self(t.exc) + op.RAISE_VARARGS(1) 275 | 276 | def visit_Import(self, t): 277 | return concat([self.import_name(0, None, alias.name) 278 | + self.store(alias.asname or alias.name.split('.')[0]) 279 | for alias in t.names]) 280 | 281 | def visit_ImportFrom(self, t): 282 | fromlist = tuple([alias.name for alias in t.names]) 283 | return (self.import_name(t.level, fromlist, t.module) 284 | + concat([op.IMPORT_FROM(self.names[alias.name]) 285 | + self.store(alias.asname or alias.name) 286 | for alias in t.names]) 287 | + op.POP_TOP) 288 | 289 | def import_name(self, level, fromlist, name): 290 | return (self.load_const(level) 291 | + self.load_const(fromlist) 292 | + op.IMPORT_NAME(self.names[name])) 293 | 294 | def visit_While(self, t): 295 | loop, end = Label(), Label() 296 | return ( loop + self(t.test) + op.POP_JUMP_IF_FALSE(end) 297 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 298 | + end) 299 | 300 | def visit_For(self, t): 301 | loop, end = Label(), Label() 302 | return ( self(t.iter) + op.GET_ITER 303 | + loop + op.FOR_ITER(end) + self(t.target) 304 | + self(t.body) + op.JUMP_ABSOLUTE(loop) 305 | + end + OffsetStack()) 306 | 307 | def visit_Return(self, t): 308 | return ((self(t.value) if t.value else self.load_const(None)) 309 | + op.RETURN_VALUE) 310 | 311 | def visit_Function(self, t): 312 | code = self.sprout(t).compile_function(t) 313 | return self.make_closure(code, t.name) 314 | 315 | def sprout(self, t): 316 | return CodeGen(self.filename, self.scope.children[t]) 317 | 318 | def make_closure(self, code, name): 319 | if code.co_freevars: 320 | return (concat([op.LOAD_CLOSURE(self.cell_index(freevar)) 321 | for freevar in code.co_freevars]) 322 | + op.BUILD_TUPLE(len(code.co_freevars)) 323 | + self.load_const(code) + self.load_const(name) 324 | + op.MAKE_CLOSURE(0)) 325 | else: 326 | return (self.load_const(code) + self.load_const(name) 327 | + op.MAKE_FUNCTION(0)) 328 | 329 | def compile_function(self, t): 330 | self.load_const(ast.get_docstring(t)) 331 | for arg in t.args.args: 332 | self.varnames[arg.arg] 333 | if t.args.vararg: self.varnames[t.args.vararg.arg] 334 | if t.args.kwarg: self.varnames[t.args.kwarg.arg] 335 | assembly = self(t.body) + self.load_const(None) + op.RETURN_VALUE 336 | return self.make_code(assembly, t.name, 337 | len(t.args.args), t.args.vararg, t.args.kwarg) 338 | 339 | def visit_ClassDef(self, t): 340 | code = self.sprout(t).compile_class(t) 341 | return (op.LOAD_BUILD_CLASS + self.make_closure(code, t.name) 342 | + self.load_const(t.name) 343 | + self(t.bases) 344 | + op.CALL_FUNCTION(2 + len(t.bases)) 345 | + self.store(t.name)) 346 | 347 | def compile_class(self, t): 348 | docstring = ast.get_docstring(t) 349 | assembly = ( self.load('__name__') + self.store('__module__') 350 | + self.load_const(t.name) + self.store('__qualname__') 351 | + (no_op if docstring is None else 352 | self.load_const(docstring) + self.store('__doc__')) 353 | + self(t.body) 354 | + self.load_const(None) + op.RETURN_VALUE) 355 | return self.make_code(assembly, t.name, 0, False, False) 356 | 357 | def make_table(): 358 | table = collections.defaultdict(lambda: len(table)) 359 | return table 360 | 361 | def collect(table): 362 | return tuple(sorted(table, key=table.get)) 363 | 364 | def load_file(filename, module_name): 365 | f = open(filename) 366 | source = f.read() 367 | f.close() 368 | return module_from_ast(module_name, filename, ast.parse(source)) 369 | 370 | def module_from_ast(module_name, filename, t): 371 | code = code_for_module(module_name, filename, t) 372 | module = types.ModuleType(module_name, ast.get_docstring(t)) 373 | exec(code, module.__dict__) 374 | return module 375 | 376 | def code_for_module(module_name, filename, t): 377 | t = desugar(t) 378 | check_conformity(t) 379 | return CodeGen(filename, top_scope(t)).compile_module(t, module_name) 380 | 381 | def desugar(t): 382 | return ast.fix_missing_locations(Desugarer().visit(t)) 383 | 384 | def rewriter(rewrite): 385 | def visit(self, t): 386 | return ast.copy_location(rewrite(self, self.generic_visit(t)), 387 | t) 388 | return visit 389 | 390 | def Call(fn, args): 391 | return ast.Call(fn, args, [], None, None) 392 | 393 | class Desugarer(ast.NodeTransformer): 394 | 395 | @rewriter 396 | def visit_Assert(self, t): 397 | return ast.If(t.test, 398 | [], 399 | [ast.Raise(Call(ast.Name('AssertionError', load), 400 | [] if t.msg is None else [t.msg]), 401 | None)]) 402 | 403 | @rewriter 404 | def visit_Lambda(self, t): 405 | return Function('', t.args, [ast.Return(t.body)]) 406 | 407 | @rewriter 408 | def visit_FunctionDef(self, t): 409 | fn = Function(t.name, t.args, t.body) 410 | for d in reversed(t.decorator_list): 411 | fn = Call(d, [fn]) 412 | return ast.Assign([ast.Name(t.name, store)], fn) 413 | 414 | @rewriter 415 | def visit_ListComp(self, t): 416 | result_append = ast.Attribute(ast.Name('.0', load), 'append', load) 417 | body = ast.Expr(Call(result_append, [t.elt])) 418 | for loop in reversed(t.generators): 419 | for test in reversed(loop.ifs): 420 | body = ast.If(test, [body], []) 421 | body = ast.For(loop.target, loop.iter, [body], []) 422 | fn = [body, 423 | ast.Return(ast.Name('.0', load))] 424 | args = ast.arguments([ast.arg('.0', None)], None, [], None, [], []) 425 | return Call(Function('', args, fn), 426 | [ast.List([], load)]) 427 | 428 | class Function(ast.FunctionDef): 429 | _fields = ('name', 'args', 'body') 430 | 431 | load, store = ast.Load(), ast.Store() 432 | 433 | def top_scope(t): 434 | top = Scope(t, ()) 435 | top.visit(t) 436 | top.analyze(set()) 437 | return top 438 | 439 | class Scope(ast.NodeVisitor): 440 | def __init__(self, t, defs): 441 | self.t = t 442 | self.children = {} # Enclosed sub-scopes 443 | self.defs = set(defs) # Variables defined 444 | self.uses = set() # Variables referenced 445 | 446 | def visit_ClassDef(self, t): 447 | self.defs.add(t.name) 448 | for expr in t.bases: self.visit(expr) 449 | subscope = Scope(t, ()) 450 | self.children[t] = subscope 451 | for stmt in t.body: subscope.visit(stmt) 452 | 453 | def visit_Function(self, t): 454 | all_args = list(t.args.args) + [t.args.vararg, t.args.kwarg] 455 | subscope = Scope(t, [arg.arg for arg in all_args if arg]) 456 | self.children[t] = subscope 457 | for stmt in t.body: subscope.visit(stmt) 458 | 459 | def visit_Import(self, t): 460 | for alias in t.names: 461 | self.defs.add(alias.asname or alias.name.split('.')[0]) 462 | 463 | def visit_ImportFrom(self, t): 464 | for alias in t.names: 465 | self.defs.add(alias.asname or alias.name) 466 | 467 | def visit_Name(self, t): 468 | if isinstance(t.ctx, ast.Load): self.uses.add(t.id) 469 | elif isinstance(t.ctx, ast.Store): self.defs.add(t.id) 470 | else: assert False 471 | 472 | def analyze(self, parent_defs): 473 | self.local_defs = self.defs if isinstance(self.t, Function) else set() 474 | for child in self.children.values(): 475 | child.analyze(parent_defs | self.local_defs) 476 | child_uses = set([var for child in self.children.values() 477 | for var in child.freevars]) 478 | uses = self.uses | child_uses 479 | self.cellvars = tuple(child_uses & self.local_defs) 480 | self.freevars = tuple(uses & (parent_defs - self.local_defs)) 481 | self.derefvars = self.cellvars + self.freevars 482 | 483 | def access(self, name): 484 | return ('deref' if name in self.derefvars else 485 | 'fast' if name in self.local_defs else 486 | 'name') 487 | 488 | if __name__ == '__main__': 489 | sys.argv.pop(0) 490 | load_file(sys.argv[0], '__main__') 491 | -------------------------------------------------------------------------------- /grammar/README.md: -------------------------------------------------------------------------------- 1 | # Sketching towards a Python parser 2 | 3 | * subset: Python 3's official grammar, edited down to just the parts 4 | that go into the Tailbiter subset of Python (untested, probably a 5 | bit wrong). 6 | 7 | * metagrammar.py: Parses a grammar like `subset`, and turns it into a 8 | recognizer. (Mostly; I haven't yet stapled it together with the 9 | Python lexer.) We don't yet do anything with this, and most likely 10 | won't ever; but maybe the way to go for the full Python parser will 11 | be to auto-generate it from the grammar, starting with this. To make 12 | that happen I'd have to figure out how to bring in knowledge of the 13 | AST types and how they're related to the concrete syntax. In Py2 for 14 | now. 15 | 16 | * parson3.py: Port of the core of the Parson parsing library to Py3. 17 | 18 | * parsiflage.py: Uses `parson3` to parse a tiny fragment of Py3 (some 19 | arithmetic expressions). Uses the built-in `tokenize` module for the 20 | lexer, for now. For now, this is just a recognizer, and doesn't 21 | produce useful error messages. 22 | 23 | Vague plan: 24 | 25 | * Make `parsiflage` actually produce Python ASTs. DONE 26 | 27 | * Produce reasonable error messages. 28 | 29 | * Extend the tiny grammar a bit to if-statements so we'll have to 30 | contend with indentation. 31 | 32 | * Sketch a lexer to go with the 33 | parser. (https://github.com/hausdorff/pow likely to help a lot.) 34 | 35 | * Sketch ways to scale this up to the full grammar, and pick one. 36 | (This'll include deciding on a parsing algorithm! LL(1), PEG, 37 | derivatives?) 38 | 39 | * Make sure all the final code stays within the Tailbiter subset of 40 | Python, for self-hosting. 41 | -------------------------------------------------------------------------------- /grammar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darius/tailbiter/2c4f0365b720c39495fabcca9a60899ac0a7df96/grammar/__init__.py -------------------------------------------------------------------------------- /grammar/metagrammar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Process Python's LL(1) grammar. 3 | For the moment we just recognize it (`pip install parson`). 4 | """ 5 | 6 | import tokenize 7 | from parson import Grammar, chain, delay, either, empty, label, one_that, plus, star 8 | 9 | g = r""" 10 | grammar: _ defn* !/./. 11 | 12 | defn: id ':'_ e :hug. 13 | 14 | e: e1. 15 | 16 | e1: term ('|'_ e1 :either)?. 17 | 18 | term: factor* :Chain. 19 | 20 | factor: primary ( '*'_ :star 21 | | '+'_ :plus)?. 22 | 23 | primary: '('_ e ')'_ 24 | | '['_ e ']'_ :Optional 25 | | qstring :Literal 26 | | tokenid :Token 27 | | id !':' :RuleRef. 28 | 29 | qstring: /'([^']*)'/_. 30 | tokenid: /([A-Z_]+)/_. 31 | id: /([a-z_]+)/_. 32 | _ = /\s*/. 33 | """ 34 | 35 | def foldr1(f, xs): 36 | return xs[0] if len(xs) == 1 else f(xs[0], foldr1(f, xs[1:])) 37 | 38 | def Chain(*pes): return empty if not pes else foldr1(chain, pes) 39 | def Optional(pe): return pe.maybe() 40 | def Literal(s): return label(one_that(lambda t: t[1] == s), repr(s)) # XXX 41 | def Token(name): return label(one_that(lambda t: t[1] == name), name) # XXX 42 | def RuleRef(name): return delay((lambda: rules[name]), name) 43 | 44 | grammar = Grammar(g)(**globals()) 45 | 46 | subset = open('subset').read() 47 | metagrammar = grammar.grammar(subset) 48 | rules = dict(metagrammar) 49 | pygrammar = rules['file_input'] 50 | ## pygrammar([('', 'ENDMARKER',)]) 51 | #. () 52 | 53 | ## grammar.grammar("dotted_name: NAME ('.' NAME)*") 54 | #. (('dotted_name', (NAME (('.' NAME))*)),) 55 | 56 | ## for pair in grammar.grammar('yo: hey boo: yah'): print pair 57 | #. ('yo', hey) 58 | #. ('boo', yah) 59 | 60 | ## for k, v in sorted(rules.items()): print k, v 61 | #. and_expr (shift_expr (('&' shift_expr))*) 62 | #. and_test (not_test (('and' not_test))*) 63 | #. arglist (((argument ','))* ((argument (',')?)|(('*' (test (((',' argument))* ((',' ('**' test)))?)))|('**' test)))) 64 | #. argslist ((NAME (((',' NAME))* ((',' ((('*' (NAME ((',' ('**' NAME)))?))|('**' NAME)))?))?))|(('*' (NAME ((',' ('**' NAME)))?))|('**' NAME))) 65 | #. argument ((test (comp_for)?)|(test ('=' test))) 66 | #. arith_expr (term ((('+'|'-') term))*) 67 | #. assert_stmt ('assert' (test ((',' test))?)) 68 | #. atom (('[' ((testlist_comp)? ']'))|(NAME|(NUMBER|((STRING)+|('None'|('True'|'False')))))) 69 | #. classdef ('class' (NAME ((('(' ((arglist)? ')')))? (':' suite)))) 70 | #. comp_for ('for' (exprlist ('in' (or_test (comp_iter)?)))) 71 | #. comp_if ('if' (test_nocond (comp_iter)?)) 72 | #. comp_iter (comp_for|comp_if) 73 | #. comp_op ('<'|('>'|('=='|('>='|('<='|('!='|('in'|(('not' 'in')|('is'|('is' 'not')))))))))) 74 | #. comparison (expr ((comp_op expr))*) 75 | #. compound_stmt (if_stmt|(while_stmt|(for_stmt|(funcdef|(classdef|decorated))))) 76 | #. decorated ((decorator)+ funcdef) 77 | #. decorator ('@' (dotted_name ((('(' ((arglist)? ')')))? NEWLINE))) 78 | #. dotted_as_name (dotted_name (('as' NAME))?) 79 | #. dotted_as_names (dotted_as_name ((',' dotted_as_name))*) 80 | #. dotted_name (NAME (('.' NAME))*) 81 | #. expr (xor_expr (('|' xor_expr))*) 82 | #. expr_stmt (testlist_expr (('=' testlist_expr))*) 83 | #. exprlist (expr (((',' expr))* (',')?)) 84 | #. factor ((('+'|('-'|'~')) factor)|power) 85 | #. file_input (((NEWLINE|stmt))* ENDMARKER) 86 | #. flow_stmt (return_stmt|raise_stmt) 87 | #. for_stmt ('for' (exprlist ('in' (testlist (':' suite))))) 88 | #. funcdef ('def' (NAME (parameters (':' suite)))) 89 | #. if_stmt ('if' (test (':' (suite ((('elif' (test (':' suite))))* (('else' (':' suite)))?))))) 90 | #. import_as_name (NAME (('as' NAME))?) 91 | #. import_as_names (import_as_name (((',' import_as_name))* (',')?)) 92 | #. import_from ('from' (((('.')* dotted_name)|('.')+) ('import' (('(' (import_as_names ')'))|import_as_names)))) 93 | #. import_name ('import' dotted_as_names) 94 | #. import_stmt (import_name|import_from) 95 | #. lambdef ('lambda' ((argslist)? (':' test))) 96 | #. lambdef_nocond ('lambda' ((argslist)? (':' test_nocond))) 97 | #. not_test (('not' not_test)|comparison) 98 | #. or_test (and_test (('or' and_test))*) 99 | #. parameters ('(' ((argslist)? ')')) 100 | #. power (atom ((trailer)* (('**' factor))?)) 101 | #. raise_stmt ('raise' (test)?) 102 | #. return_stmt ('return' (testlist)?) 103 | #. shift_expr (arith_expr ((('<<'|'>>') arith_expr))*) 104 | #. simple_stmt (small_stmt (((';' small_stmt))* ((';')? NEWLINE))) 105 | #. small_stmt (expr_stmt|(flow_stmt|(import_stmt|assert_stmt))) 106 | #. stmt (simple_stmt|compound_stmt) 107 | #. subscript test 108 | #. subscriptlist (subscript (((',' subscript))* (',')?)) 109 | #. suite (simple_stmt|(NEWLINE (INDENT ((stmt)+ DEDENT)))) 110 | #. term (factor ((('*'|('/'|('%'|'//'))) factor))*) 111 | #. test ((or_test (('if' (or_test ('else' test))))?)|lambdef) 112 | #. test_nocond (or_test|lambdef_nocond) 113 | #. testlist (test (((',' test))* (',')?)) 114 | #. testlist_comp (test (comp_for|(((',' test))* (',')?))) 115 | #. testlist_expr (test (((',' test))* (',')?)) 116 | #. trailer (('(' ((arglist)? ')'))|(('[' (subscriptlist ']'))|('.' NAME))) 117 | #. while_stmt ('while' (test (':' suite))) 118 | #. xor_expr (and_expr (('^' and_expr))*) 119 | -------------------------------------------------------------------------------- /grammar/parsiflage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Let's work out some basic parsing of some productions from the Python grammar. 3 | Start from a py3 port of parts of Parson, adapted to work on tokens 4 | from `tokenize`. 5 | """ 6 | 7 | import sys 8 | import ast 9 | import token as T 10 | from tokenize import tokenize 11 | 12 | # First an annoying necessary hack. Certain of the AST types (the 13 | # 'simple' ones) do not carry source-position attributes: the 14 | # constructors silently drop them. (If this is documented, I missed 15 | # it. I suppose the reason is efficiency; but this position info needs 16 | # to live *somewhere*, and the AST node is its natural home.) For all 17 | # of these types let's define subclasses that do retain these 18 | # attributes. 19 | 20 | position_attributes = dict(_attributes = ('lineno', 'col_offset')) 21 | 22 | def position_extend(class_): 23 | return type(class_.__name__, (class_,), position_attributes) 24 | def map_extend(names): 25 | return [position_extend(getattr(ast, name)) for name in names.split()] 26 | 27 | And, Or = map_extend('And Or') 28 | Add, Sub, Mult, Div, Mod, Pow, LShift, RShift, BitOr, BitXor, BitAnd, FloorDiv = \ 29 | map_extend('Add Sub Mult Div Mod Pow LShift RShift BitOr BitXor BitAnd FloorDiv') 30 | Invert, Not, UAdd, USub = \ 31 | map_extend('Invert Not UAdd USub') 32 | Eq, NotEq, Lt, LtE, Gt, GtE, Is, IsNot, In, NotIn = \ 33 | map_extend('Eq NotEq Lt LtE Gt GtE Is IsNot In NotIn') 34 | 35 | 36 | # OK, back to parsing. 37 | 38 | if __name__ == '__main__': 39 | # XXX temporary hack during development 40 | import parson3 as P 41 | else: 42 | from . import parson3 as P 43 | 44 | def main(argv): 45 | filename = argv[1] 46 | if 0: 47 | with open(filename, 'rb') as f: 48 | tokens = list(tokenize(f.readline)) 49 | print_tokens(tokens) 50 | demo_parse(tokens) 51 | else: 52 | with open(filename, 'rb') as f: 53 | t = parse(f) 54 | import astpp 55 | print(astpp.dump(t, include_attributes=True)) 56 | 57 | class Name(P._Pex): 58 | def __init__(self): 59 | self.face = 'XXX' 60 | def run(self, s, far, state): 61 | i, vals = state 62 | token = s[i] 63 | if token.type != T.NAME or token.string in keywords: 64 | return [] 65 | vals += (token,) 66 | return [(_step(far, i+1), vals)] 67 | 68 | class Tok(P._Pex): 69 | "Matches a single lexical token of a given kind." 70 | def __init__(self, kind, literal_string=None, keep=True): 71 | self.kind = kind 72 | self.expected = literal_string 73 | self.keep = keep 74 | self.face = 'XXX' 75 | def run(self, s, far, state): 76 | i, vals = state 77 | token = s[i] 78 | if token.type != self.kind: 79 | return [] 80 | if self.expected is not None and token.string != self.expected: 81 | return [] 82 | if self.keep: 83 | vals += (token,) 84 | return [(_step(far, i+1), vals)] 85 | 86 | def _step(far, i): 87 | "Update far with a new position." 88 | far[0] = max(far[0], i) 89 | return i 90 | 91 | """ 92 | file_input: (NEWLINE | stmt)* ENDMARKER 93 | stmt: simple_stmt | compound_stmt 94 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 95 | small_stmt: expr_stmt 96 | 97 | compound_stmt: if_stmt 98 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 99 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 100 | 101 | expr_stmt: testlist_expr ('=' testlist_expr)* 102 | testlist_expr: test 103 | test: arith_expr 104 | arith_expr: term (('+'|'-') term)* 105 | term: factor (('*'|'/'|'%'|'//') factor)* 106 | factor: ('+'|'-'|'~') factor | power 107 | power: atom trailer* ('**' factor)? 108 | atom: '(' test ')' | NAME | NUMBER | STRING+ | 'None' | 'True' | 'False' 109 | trailer: '(' [arglist] ')' 110 | arglist: (argument ',')* argument [','] 111 | argument: test ['=' test] 112 | """ 113 | 114 | NUMBER = Tok(T.NUMBER) 115 | STRING = Tok(T.STRING) 116 | NAME = Name() 117 | OP = lambda s: Tok(T.OP, s) 118 | Punct = lambda s: Tok(T.OP, s, keep=False) 119 | 120 | keywords = set() 121 | 122 | def Kwd(s, keep=False): 123 | keywords.add(s) 124 | return Tok(T.NAME, s, keep=keep) 125 | 126 | def Subst(string, maker): 127 | return OP(string) >> (lambda t: lambda ctx: maker(lineno=t.start[0], col_offset=t.start[1])) 128 | 129 | def wrapping(maker, wrapper): 130 | return lambda t: lambda ctx: maker(wrapper(t.string), 131 | lineno=t.start[0], 132 | col_offset=t.start[1]) 133 | 134 | def propagating(maker): 135 | result = lambda node_fn, *node_fns: lambda ctx: next(ast.copy_location(maker(node, *[n(ctx) for n in node_fns]), node) 136 | for node in [node_fn(ctx)]) 137 | result.__name__ = maker.__name__ 138 | return result 139 | 140 | def hug(*args): 141 | return lambda ctx: [arg(ctx) for arg in args] 142 | 143 | def make_module(*stmts): 144 | m = ast.Module(list(stmts)) 145 | return ast.copy_location(m, stmts[0]) if stmts else m 146 | 147 | def make_if(kwd, test, then, *rest): 148 | # (This'd be simpler with a different form of the grammar.) 149 | test = test(ast.Load()) 150 | if not rest: else_ = [] 151 | elif len(rest) == 1: else_ = rest[0] 152 | else: else_ = [make_if(*rest)] 153 | return ast.If(test, then, else_, 154 | lineno=kwd.start[0], 155 | col_offset=kwd.start[1]) 156 | 157 | def maybe_assignment(*expr_fns): 158 | if len(expr_fns) == 1: 159 | node0 = expr_fns[0](ast.Load()) 160 | stmt = ast.Expr(node0) 161 | else: 162 | lhses = [fn(ast.Store()) for fn in expr_fns[:-1]] 163 | node0 = lhses[0] 164 | stmt = ast.Assign(lhses, expr_fns[-1](ast.Load())) 165 | return ast.copy_location(stmt, node0) 166 | 167 | def fill_context(ctx): 168 | return lambda f: f(ctx) 169 | 170 | atom = P.delay(lambda: 171 | Punct('(') + test + Punct(')') 172 | | NUMBER >> wrapping(ast.Num, ast.literal_eval) 173 | | STRING.plus() >> (lambda *tokens: lambda ctx: ast.Str(ast.literal_eval(' '.join(t.string for t in tokens)), 174 | lineno=tokens[0].start[0], 175 | col_offset=tokens[0].start[1])) 176 | | Tok(T.NAME, 'None') >> wrapping(ast.NameConstant, lambda s: None) 177 | | Tok(T.NAME, 'True') >> wrapping(ast.NameConstant, lambda s: True) 178 | | Tok(T.NAME, 'False') >> wrapping(ast.NameConstant, lambda s: False) 179 | | NAME >> (lambda t: lambda ctx: ast.Name(t.string, ctx, 180 | lineno=t.start[0], 181 | col_offset=t.start[1])) 182 | ) 183 | arglist = P.delay(lambda: 184 | (test + Punct(',')).star() + test + Punct(',').maybe()) 185 | trailer = (Punct('(') + (arglist.maybe() >> hug) + Punct(')') 186 | + propagating(lambda f, args: ast.Call(f, args, [], None, None))) 187 | power = P.delay(lambda: 188 | P.seclude( 189 | atom + trailer.star() + (Subst('**', Pow) + factor + propagating(ast.BinOp)).maybe())) 190 | factor = P.delay(lambda: 191 | ( (( Subst('+', UAdd) 192 | | Subst('-', USub) 193 | | Subst('~', Invert)) + factor) >> propagating(ast.UnaryOp)) 194 | | power) 195 | term = P.seclude( 196 | factor + (( Subst('*', Mult) 197 | | Subst('/', Div) 198 | | Subst('%', Mod) 199 | | Subst('//', FloorDiv)) + factor + propagating(ast.BinOp)).star()) 200 | arith_expr = P.seclude( 201 | term + (( Subst('+', Add) 202 | | Subst('-', Sub)) + term + propagating(ast.BinOp)).star()) 203 | test = arith_expr 204 | 205 | expr_stmt = P.seclude( 206 | test + (Punct('=') + test).star() 207 | + maybe_assignment) 208 | 209 | simple_stmt = expr_stmt + Tok(T.NEWLINE, keep=False) 210 | 211 | stmt = P.delay(lambda: simple_stmt | compound_stmt) 212 | 213 | suite = ( 214 | simple_stmt 215 | | (Tok(T.NEWLINE, keep=False) + Tok(T.INDENT, keep=False) + stmt.plus() + Tok(T.DEDENT, keep=False)) 216 | ) >> (lambda *stmts: list(stmts)) 217 | 218 | if_stmt = P.seclude( 219 | Kwd('if', keep=True) + test + Punct(':') + suite 220 | + (Kwd('elif', keep=True) + test + Punct(':') + suite).star() 221 | + (Kwd('else') + Punct(':') + suite).maybe() 222 | + make_if 223 | ) 224 | 225 | compound_stmt = if_stmt 226 | 227 | file_input = (Tok(56, keep=False) # 'ENCODING' token -- yeah, no name for it 228 | + (Tok(T.NEWLINE, keep=False) | stmt).star() 229 | + Tok(T.ENDMARKER, keep=False)) >> make_module 230 | 231 | top = file_input 232 | 233 | def parse(f): 234 | tokens = list(tokenize(f.readline)) 235 | # print_tokens(tokens) 236 | far = [0] 237 | for i, vals in top.run(tokens, far, (0, ())): 238 | if 1: 239 | assert i == len(tokens), "not full parse: %d of %r" % (i, tokens) 240 | assert len(vals) == 1 241 | return vals[0] 242 | 243 | def demo_parse(tokens): 244 | far = [0] 245 | for i, vals in top.run(tokens, far, (0, ())): 246 | print(i, tokens[i:]) 247 | print('vals', vals) 248 | try: 249 | import astpp 250 | except ImportError: 251 | continue 252 | for tree in vals: 253 | print(tree) 254 | print(astpp.dump(tree, include_attributes=True)) 255 | print('far', far[0]) 256 | 257 | def print_tokens(tokens): 258 | for t in tokens: 259 | # print_token(t) 260 | skim_token(t) 261 | 262 | def skim_token(t): 263 | print(T.tok_name[t.type], T.tok_name[t.exact_type], t.string) 264 | return 265 | if T.tok_name[t.type] == T.tok_name[t.exact_type]: 266 | print(T.tok_name[t.type], t.string) 267 | else: 268 | print(T.tok_name[t.type], T.tok_name[t.exact_type], t.string) 269 | 270 | def print_token(t): 271 | # print(t.count) 272 | # print(t.index) 273 | # print() 274 | print('line', t.line) 275 | print('start', t.start) 276 | print('end', t.end) 277 | print('string', t.string) 278 | print('type', t.type, T.tok_name[t.type]) 279 | print('exact_type', t.exact_type, T.tok_name[t.exact_type]) 280 | print() 281 | 282 | if __name__ == '__main__': 283 | main(sys.argv) 284 | -------------------------------------------------------------------------------- /grammar/parson3.py: -------------------------------------------------------------------------------- 1 | """ 2 | A port of the core of Parson to py3 3 | """ 4 | 5 | def maybe(p): 6 | "Return a pex matching 0 or 1 of what p matches." 7 | return label(either(p, empty), 8 | '(%r)?', p) 9 | 10 | def plus(p): 11 | "Return a pex matching 1 or more of what p matches." 12 | return label(chain(p, star(p)), 13 | '(%r)+', p) 14 | 15 | def star(p): 16 | "Return a pex matching 0 or more of what p matches." 17 | return label(recur(lambda p_star: maybe(chain(p, p_star))), 18 | '(%r)*', p) 19 | 20 | def invert(p): 21 | "Return a pex that succeeds just when p fails." 22 | return _Pex(('~(%r)', p), 23 | lambda s, far, st: [] if p.run(s, [0], st) else [st]) 24 | 25 | def Pex(x): 26 | if isinstance(x, _Pex): return x 27 | if callable(x): return feed(x) 28 | assert False 29 | 30 | def trace(message): 31 | "A peg that succeeds, and says so." 32 | # TODO: better debugging means 33 | def tracer(s, far, state): 34 | (i, vals) = state 35 | print(message, i, vals) 36 | return [(i, vals)] 37 | return _Pex('trace', tracer) 38 | 39 | class _Pex: 40 | "A parsing expression." 41 | def __init__(self, face, run): 42 | self.face = face 43 | self.run = run 44 | def __repr__(self): 45 | if isinstance(self.face, str): return self.face 46 | if isinstance(self.face, tuple): return self.face[0] % self.face[1:] 47 | assert False, "Bad face" 48 | def __call__(self, sequence): 49 | """Parse a prefix of sequence and return a tuple of values, or 50 | raise Unparsable.""" 51 | far = [0] 52 | for _, vals in self.run(sequence, far, (0, ())): 53 | return vals 54 | raise Unparsable(self, sequence[:far[0]], sequence[far[0]:]) 55 | # XXX consider using different operators, for clearer precedence at least 56 | def __add__(self, other): return chain(self, Pex(other)) 57 | def __radd__(self, other): return chain(Pex(other), self) 58 | def __or__(self, other): return either(self, Pex(other)) 59 | def __ror__(self, other): return either(Pex(other), self) 60 | def __rshift__(self, fn): return label(seclude(chain(self, Pex(fn))), 61 | '(%r>>%s)', self, _fn_name(fn)) 62 | __invert__ = invert 63 | maybe = maybe 64 | plus = plus 65 | star = star 66 | 67 | class Unparsable(Exception): 68 | "A parsing failure." 69 | @property 70 | def position(self): 71 | "The rightmost position positively reached in the parse attempt." 72 | return len(self.args[1]) 73 | @property 74 | def failure(self): # XXX rename? 75 | "Return slices of the input before and after the parse failure." 76 | return self.args[1], self.args[2] 77 | 78 | def label(p, string, *args): 79 | """Return an equivalent pex whose repr is (string % args), or just 80 | string if no args.""" 81 | assert isinstance(p, _Pex) 82 | return _Pex(((string,) + args if args else string), 83 | p.run) 84 | 85 | def recur(fn): 86 | "Return a pex p such that p = fn(p). This is like the Y combinator." 87 | p = delay(lambda: fn(p), 'recur(%s)', _fn_name(fn)) 88 | return p 89 | 90 | def _fn_name(fn): 91 | return fn.__name__ if hasattr(fn, '__name__') else repr(fn) 92 | 93 | def delay(thunk, *face): # XXX document face 94 | """Precondition: thunk() will return a pex p. We immediately 95 | return a pex q equivalent to that future p, but we'll call thunk() 96 | only once, and not until the first use of q. Use this for 97 | recursive grammars.""" 98 | def run(s, far, st): 99 | q.run = Pex(thunk()).run 100 | return q.run(s, far, st) 101 | q = _Pex(face or ('delay(%s)', _fn_name(thunk)), 102 | run) 103 | return q 104 | 105 | # TODO: need doc comments or something 106 | fail = _Pex('fail', lambda s, far, st: []) 107 | empty = label(~fail, 'empty') 108 | 109 | def seclude(p): 110 | """Return a pex like p, but where p doesn't get to see or alter 111 | the incoming values tuple.""" 112 | def run(s, far, state): 113 | i, vals = state 114 | return [(i2, vals + vals2) 115 | for i2, vals2 in p.run(s, far, (i, ()))] 116 | return _Pex(('[%r]', p), run) 117 | 118 | def either(p, q): 119 | """Return a pex that succeeds just when one of p or q does, trying 120 | them in that order.""" 121 | return _Pex(('(%r|%r)', p, q), 122 | lambda s, far, st: 123 | p.run(s, far, st) or q.run(s, far, st)) 124 | 125 | def chain(p, q): 126 | """Return a pex that succeeds when p and q both do, with q 127 | starting where p left off.""" 128 | return _Pex(('(%r %r)', p, q), 129 | lambda s, far, st: 130 | [st3 131 | for st2 in p.run(s, far, st) 132 | for st3 in q.run(s, far, st2)]) 133 | 134 | def alter(fn): # XXX better name 135 | """Return a pex that always succeeds, changing the values tuple 136 | from xs to fn(*xs).""" 137 | def run(s, far, state): 138 | i, vals = state 139 | return [(i, fn(*vals))] # XXX check that result is tuple? 140 | return _Pex(('alter(%s)', _fn_name(fn)), run) 141 | 142 | def feed(fn): 143 | """Return a pex that always succeeds, changing the values tuple 144 | from xs to (fn(*xs),). (We're feeding fn with the values.)""" 145 | return label(alter(lambda *vals: (fn(*vals),)), 146 | ':%s', _fn_name(fn)) 147 | -------------------------------------------------------------------------------- /grammar/subset: -------------------------------------------------------------------------------- 1 | file_input: (NEWLINE | stmt)* ENDMARKER 2 | 3 | stmt: simple_stmt | compound_stmt 4 | 5 | compound_stmt: if_stmt | while_stmt | for_stmt | funcdef | classdef | decorated 6 | 7 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 8 | while_stmt: 'while' test ':' suite 9 | for_stmt: 'for' exprlist 'in' testlist ':' suite 10 | 11 | suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 12 | 13 | decorated: decorator+ funcdef 14 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 15 | 16 | funcdef: 'def' NAME parameters ':' suite 17 | parameters: '(' [argslist] ')' 18 | argslist: (NAME (',' NAME)* [',' 19 | ['*' NAME [',' '**' NAME] | '**' NAME]] 20 | | '*' NAME [',' '**' NAME] | '**' NAME) 21 | 22 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 23 | arglist: (argument ',')* (argument [','] 24 | |'*' test (',' argument)* [',' '**' test] 25 | |'**' test) 26 | argument: test ['=' test] 27 | 28 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 29 | small_stmt: expr_stmt | flow_stmt | import_stmt | assert_stmt 30 | expr_stmt: testlist_expr ('=' testlist_expr)* 31 | testlist_expr: test (',' test)* [','] 32 | 33 | flow_stmt: return_stmt | raise_stmt 34 | return_stmt: 'return' [testlist] 35 | raise_stmt: 'raise' [test] 36 | 37 | import_stmt: import_name | import_from 38 | import_name: 'import' dotted_as_names 39 | import_from: ('from' ('.'* dotted_name | '.'+) 40 | 'import' ('(' import_as_names ')' | import_as_names)) 41 | import_as_names: import_as_name (',' import_as_name)* [','] 42 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 43 | import_as_name: NAME ['as' NAME] 44 | dotted_as_name: dotted_name ['as' NAME] 45 | dotted_name: NAME ('.' NAME)* 46 | 47 | assert_stmt: 'assert' test [',' test] 48 | 49 | test: or_test ['if' or_test 'else' test] | lambdef 50 | test_nocond: or_test | lambdef_nocond 51 | 52 | lambdef: 'lambda' [argslist] ':' test 53 | lambdef_nocond: 'lambda' [argslist] ':' test_nocond 54 | 55 | or_test: and_test ('or' and_test)* 56 | and_test: not_test ('and' not_test)* 57 | not_test: 'not' not_test | comparison 58 | 59 | comparison: expr (comp_op expr)* 60 | comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'|'is' 'not' 61 | 62 | expr: xor_expr ('|' xor_expr)* 63 | xor_expr: and_expr ('^' and_expr)* 64 | and_expr: shift_expr ('&' shift_expr)* 65 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 66 | arith_expr: term (('+'|'-') term)* 67 | term: factor (('*'|'/'|'%'|'//') factor)* 68 | factor: ('+'|'-'|'~') factor | power 69 | power: atom trailer* ['**' factor] 70 | 71 | atom: ('(' [test] ')' | 72 | '[' [testlist_comp] ']' | 73 | NAME | NUMBER | STRING+ | 'None' | 'True' | 'False') 74 | 75 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 76 | 77 | subscriptlist: subscript (',' subscript)* [','] 78 | subscript: test 79 | 80 | testlist_comp: test ( comp_for | (',' test)* [','] ) 81 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 82 | comp_iter: comp_for | comp_if 83 | comp_if: 'if' test_nocond [comp_iter] 84 | 85 | exprlist: expr (',' expr)* [','] 86 | testlist: test (',' test)* [','] 87 | -------------------------------------------------------------------------------- /grammar/t: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail; shopt -s failglob # 'Bash strict mode' 3 | 4 | # Test suite for the parser. (Crude!) 5 | 6 | for f in eg_*.py 7 | do 8 | ./test-parse "$f" 9 | done 10 | -------------------------------------------------------------------------------- /grammar/test-parse: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail; shopt -s failglob # 'Bash strict mode' 3 | 4 | # Run the parser and check that it succeeds and produces the expected output. 5 | # This uses out/ and ref/ directories which I didn't check in to the repo. 6 | 7 | python3 parsiflage.py "$1" >out/"$1" 8 | diff -u ref/"$1" out/ 9 | -------------------------------------------------------------------------------- /grammar/test-update: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail; shopt -s failglob # 'Bash strict mode' 3 | 4 | # Define all the expected parse outputs to be what the current code produces. 5 | 6 | for f in "$@" 7 | do 8 | python3 parsiflage.py "$f" >ref/"$f" 9 | done 10 | -------------------------------------------------------------------------------- /meta_via_parsiflage.py: -------------------------------------------------------------------------------- 1 | "Load a module using grammar.parsiflage, compiler, and byterun.interpreter." 2 | 3 | import ast, sys, types 4 | import compiler, byterun.interpreter, grammar.parsiflage 5 | 6 | def load_file(filename, module_name): 7 | f = open(filename, 'rb') 8 | t = grammar.parsiflage.parse(f) 9 | f.close() 10 | return module_from_ast(module_name, filename, t) 11 | 12 | def module_from_ast(module_name, filename, t): 13 | code = compiler.code_for_module(module_name, filename, t) 14 | module = types.ModuleType(module_name, ast.get_docstring(t)) 15 | byterun.interpreter.run(code, module.__dict__, None) 16 | return module 17 | 18 | if __name__ == '__main__': 19 | sys.argv.pop(0) 20 | load_file(sys.argv[0], '__main__') 21 | -------------------------------------------------------------------------------- /metameta.py: -------------------------------------------------------------------------------- 1 | """ 2 | Load a module using metacircular versions of both compiler and 3 | byterun.interpreter. 4 | """ 5 | 6 | import ast, sys, types 7 | import compiler, byterun.interpreter 8 | #sys.setrecursionlimit(8000) 9 | 10 | def read_file(filename): 11 | f = open(filename) 12 | text = f.read() 13 | f.close() 14 | return text 15 | 16 | class Loader: 17 | def __init__(self, piler, terp): 18 | self.compiler = piler 19 | self.interpreter = terp 20 | 21 | def load_file(self, filename, module_name): 22 | source = read_file(filename) 23 | return self.module_from_ast(module_name, filename, ast.parse(source)) 24 | 25 | def module_from_ast(self, module_name, filename, t): 26 | code = self.compiler.code_for_module(module_name, filename, t) 27 | module = types.ModuleType(module_name, ast.get_docstring(t)) 28 | self.interpreter.run(code, module.__dict__, None) 29 | return module 30 | 31 | base_loader = Loader(compiler, byterun.interpreter) 32 | 33 | meta_compiler = base_loader.load_file('compiler.py', 'compiler') 34 | meta_interpreter = base_loader.load_file('byterun/interpreter.py', 'interpreter') 35 | 36 | meta_loader = Loader(meta_compiler, meta_interpreter) 37 | 38 | if __name__ == '__main__': 39 | sys.argv.pop(0) 40 | meta_loader.load_file(sys.argv[0], '__main__') 41 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/darius/tailbiter/2c4f0365b720c39495fabcca9a60899ac0a7df96/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | """Basic tests for tailbiter.""" 2 | 3 | from . import vmtest 4 | 5 | class TestIt(vmtest.VmTestCase): 6 | def test_constant(self): 7 | self.assert_ok("17") 8 | 9 | def test_for_loop(self): 10 | self.assert_ok("""\ 11 | out = "" 12 | for i in range(5): 13 | out = out + str(i) 14 | print(out) 15 | """) 16 | 17 | def test_building_stuff(self): 18 | self.assert_ok("""\ 19 | print((1+1, 2+2, 3+3)) 20 | """) 21 | self.assert_ok("""\ 22 | print([1+1, 2+2, 3+3]) 23 | """) 24 | self.assert_ok("""\ 25 | print({1:1+1, 2:2+2, 3:3+3}) 26 | """) 27 | 28 | def test_subscripting(self): 29 | self.assert_ok("""\ 30 | l = list(range(10)) 31 | print("%s %s %s" % (l[0], l[3], l[9])) 32 | """) 33 | self.assert_ok("""\ 34 | l = list(range(10)) 35 | l[5] = 17 36 | print(l) 37 | """) 38 | 39 | def test_list_comprehension(self): 40 | self.assert_ok("""\ 41 | x = [z*z for z in range(5)] 42 | assert x == [0, 1, 4, 9, 16] 43 | """) 44 | 45 | def test_unary_operators(self): 46 | self.assert_ok("""\ 47 | x = 8 48 | print(-x, ~x, not x) 49 | """) 50 | 51 | def test_attributes(self): 52 | self.assert_ok("""\ 53 | l = lambda: 1 # Just to have an object... 54 | l.foo = 17 55 | print(hasattr(l, "foo"), l.foo) 56 | """) 57 | 58 | def test_import(self): 59 | self.assert_ok("""\ 60 | import math 61 | print(math.pi, math.e) 62 | from math import sqrt 63 | print(sqrt(2)) 64 | """) 65 | 66 | def test_classes(self): 67 | self.assert_ok("""\ 68 | class Thing(object): 69 | def __init__(self, x): 70 | self.x = x 71 | def meth(self, y): 72 | return self.x * y 73 | thing1 = Thing(2) 74 | thing2 = Thing(3) 75 | print(thing1.x, thing2.x) 76 | print(thing1.meth(4), thing2.meth(5)) 77 | """) 78 | 79 | def test_calling_methods_wrong(self): 80 | self.assert_ok("""\ 81 | class Thing(object): 82 | def __init__(self, x): 83 | self.x = x 84 | def meth(self, y): 85 | return self.x * y 86 | thing1 = Thing(2) 87 | print(Thing.meth(14)) 88 | """, raises=TypeError) 89 | 90 | def test_calling_subclass_methods(self): 91 | self.assert_ok("""\ 92 | class Thing(object): 93 | def foo(self): 94 | return 17 95 | 96 | class SubThing(Thing): 97 | pass 98 | 99 | st = SubThing() 100 | print(st.foo()) 101 | """) 102 | 103 | def test_subclass_attribute(self): 104 | self.assert_ok("""\ 105 | class Thing(object): 106 | def __init__(self): 107 | self.foo = 17 108 | class SubThing(Thing): 109 | pass 110 | st = SubThing() 111 | print(st.foo) 112 | """) 113 | 114 | def test_subclass_attributes_not_shared(self): 115 | self.assert_ok("""\ 116 | class Thing(object): 117 | foo = 17 118 | class SubThing(Thing): 119 | foo = 25 120 | st = SubThing() 121 | t = Thing() 122 | assert st.foo == 25 123 | assert t.foo == 17 124 | """) 125 | 126 | def test_object_attrs_not_shared_with_class(self): 127 | self.assert_ok("""\ 128 | class Thing(object): 129 | pass 130 | t = Thing() 131 | t.foo = 1 132 | Thing.foo""", raises=AttributeError) 133 | 134 | def test_data_descriptors_precede_instance_attributes(self): 135 | self.assert_ok("""\ 136 | class Foo(object): 137 | pass 138 | f = Foo() 139 | f.des = 3 140 | class Descr(object): 141 | def __get__(self, obj, cls): 142 | return 2 143 | def __set__(self, obj, val): 144 | raise NotImplementedError 145 | Foo.des = Descr() 146 | assert f.des == 2 147 | """) 148 | 149 | def test_instance_attrs_precede_non_data_descriptors(self): 150 | self.assert_ok("""\ 151 | class Foo(object): 152 | pass 153 | f = Foo() 154 | f.des = 3 155 | class Descr(object): 156 | def __get__(self, obj, cls): 157 | return 2 158 | Foo.des = Descr() 159 | assert f.des == 3 160 | """) 161 | 162 | def test_subclass_attributes_dynamic(self): 163 | self.assert_ok("""\ 164 | class Foo(object): 165 | pass 166 | class Bar(Foo): 167 | pass 168 | b = Bar() 169 | Foo.baz = 3 170 | assert b.baz == 3 171 | """) 172 | 173 | def test_attribute_access(self): 174 | self.assert_ok("""\ 175 | class Thing(object): 176 | z = 17 177 | def __init__(self): 178 | self.x = 23 179 | t = Thing() 180 | print(Thing.z) 181 | print(t.z) 182 | print(t.x) 183 | """) 184 | 185 | self.assert_ok("""\ 186 | class Thing(object): 187 | z = 17 188 | def __init__(self): 189 | self.x = 23 190 | t = Thing() 191 | print(t.xyzzy) 192 | """, raises=AttributeError) 193 | 194 | def test_staticmethods(self): 195 | self.assert_ok("""\ 196 | class Thing(object): 197 | @staticmethod 198 | def smeth(x): 199 | print(x) 200 | @classmethod 201 | def cmeth(cls, x): 202 | print(x) 203 | 204 | Thing.smeth(1492) 205 | Thing.cmeth(1776) 206 | """) 207 | 208 | def test_unbound_methods(self): 209 | self.assert_ok("""\ 210 | class Thing(object): 211 | def meth(self, x): 212 | print(x) 213 | m = Thing.meth 214 | m(Thing(), 1815) 215 | """) 216 | 217 | def test_bound_methods(self): 218 | self.assert_ok("""\ 219 | class Thing(object): 220 | def meth(self, x): 221 | print(x) 222 | t = Thing() 223 | m = t.meth 224 | m(1815) 225 | """) 226 | 227 | def test_callback(self): 228 | self.assert_ok("""\ 229 | def lcase(s): 230 | return s.lower() 231 | l = ["xyz", "ABC"] 232 | l.sort(key=lcase) 233 | print(l) 234 | assert l == ["ABC", "xyz"] 235 | """) 236 | 237 | def test_unpacking(self): 238 | self.assert_ok("""\ 239 | a, b, c = (1, 2, 3) 240 | assert a == 1 241 | assert b == 2 242 | assert c == 3 243 | """) 244 | 245 | def test_exec_statement(self): 246 | self.assert_ok("""\ 247 | g = {} 248 | exec("a = 11", g, g) 249 | assert g['a'] == 11 250 | """) 251 | 252 | def test_jump_if_true_or_pop(self): 253 | self.assert_ok("""\ 254 | def f(a, b): 255 | return a or b 256 | assert f(17, 0) == 17 257 | assert f(0, 23) == 23 258 | assert f(0, "") == "" 259 | """) 260 | 261 | def test_jump_if_false_or_pop(self): 262 | self.assert_ok("""\ 263 | def f(a, b): 264 | return not(a and b) 265 | assert f(17, 0) is True 266 | assert f(0, 23) is True 267 | assert f(0, "") is True 268 | assert f(17, 23) is False 269 | """) 270 | 271 | def test_pop_jump_if_true(self): 272 | self.assert_ok("""\ 273 | def f(a): 274 | if not a: 275 | return 'foo' 276 | else: 277 | return 'bar' 278 | assert f(0) == 'foo' 279 | assert f(1) == 'bar' 280 | """) 281 | 282 | def test_decorator(self): 283 | self.assert_ok("""\ 284 | def verbose(func): 285 | def _wrapper(a, b): 286 | return func(a, b) 287 | return _wrapper 288 | 289 | @verbose 290 | def add(x, y): 291 | return x+y 292 | 293 | add(7, 3) 294 | """) 295 | 296 | def test_multiple_classes(self): 297 | # Making classes used to mix together all the class-scoped values 298 | # across classes. This test would fail because A.__init__ would be 299 | # over-written with B.__init__, and A(1, 2, 3) would complain about 300 | # too many arguments. 301 | self.assert_ok("""\ 302 | class A(object): 303 | def __init__(self, a, b, c): 304 | self.sum = a + b + c 305 | 306 | class B(object): 307 | def __init__(self, x): 308 | self.x = x 309 | 310 | a = A(1, 2, 3) 311 | b = B(7) 312 | print(a.sum) 313 | print(b.x) 314 | """) 315 | 316 | 317 | class TestLoops(vmtest.VmTestCase): 318 | def test_for(self): 319 | self.assert_ok("""\ 320 | for i in range(10): 321 | print(i) 322 | print("done") 323 | """) 324 | 325 | 326 | class TestComparisons(vmtest.VmTestCase): 327 | def test_in(self): 328 | self.assert_ok("""\ 329 | assert "x" in "xyz" 330 | assert "x" not in "abc" 331 | assert "x" in ("x", "y", "z") 332 | assert "x" not in ("a", "b", "c") 333 | """) 334 | 335 | def test_less(self): 336 | self.assert_ok("""\ 337 | assert 1 < 3 338 | assert 1 <= 2 and 1 <= 1 339 | assert "a" < "b" 340 | assert "a" <= "b" and "a" <= "a" 341 | """) 342 | 343 | def test_greater(self): 344 | self.assert_ok("""\ 345 | assert 3 > 1 346 | assert 3 >= 1 and 3 >= 3 347 | assert "z" > "a" 348 | assert "z" >= "a" and "z" >= "z" 349 | """) 350 | -------------------------------------------------------------------------------- /tests/test_exceptions.py: -------------------------------------------------------------------------------- 1 | """Test exceptions for tailbiter.""" 2 | 3 | from . import vmtest 4 | 5 | class TestExceptions(vmtest.VmTestCase): 6 | def test_raise_exception(self): 7 | self.assert_ok("raise Exception('oops')", raises=Exception) 8 | 9 | def test_raise_exception_class(self): 10 | self.assert_ok("raise ValueError", raises=ValueError) 11 | 12 | def test_local_name_error(self): 13 | self.assert_ok("""\ 14 | def fn(): 15 | fooey 16 | fn() 17 | """, raises=NameError) 18 | -------------------------------------------------------------------------------- /tests/test_functions.py: -------------------------------------------------------------------------------- 1 | """Test functions etc, for Byterun.""" 2 | 3 | from __future__ import print_function 4 | from . import vmtest 5 | 6 | class TestFunctions(vmtest.VmTestCase): 7 | def XXX_test_functions(self): 8 | self.assert_ok("""\ 9 | def fn(a, b=17, c="Hello", d=[]): 10 | d.append(99) 11 | print(a, b, c, d) 12 | fn(1) 13 | fn(2, 3) 14 | fn(3, c="Bye") 15 | fn(4, d=["What?"]) 16 | fn(5, "b", "c") 17 | """) 18 | 19 | def test_recursion(self): 20 | self.assert_ok("""\ 21 | def fact(n): 22 | if n <= 1: 23 | return 1 24 | else: 25 | return n * fact(n-1) 26 | f6 = fact(6) 27 | print(f6) 28 | assert f6 == 720 29 | """) 30 | 31 | def test_nested_names(self): 32 | self.assert_ok("""\ 33 | def one(): 34 | x = 1 35 | def two(): 36 | x = 2 37 | print(x) 38 | two() 39 | print(x) 40 | one() 41 | """) 42 | 43 | def test_calling_functions_with_args_kwargs(self): 44 | self.assert_ok("""\ 45 | def fn(a, b, c, d): 46 | d.append(99) 47 | print(a, b, c, d) 48 | fn(6, *[77, 88, [99]]) 49 | fn(**{'c': 23, 'a': 7, 'b': 42, 'd': [111]}) 50 | fn(6, *[77], **{'c': 23, 'd': [123]}) 51 | """) 52 | 53 | def test_defining_functions_with_args_kwargs(self): 54 | self.assert_ok("""\ 55 | def fn(*args): 56 | print("args is %r" % (args,)) 57 | fn(1, 2) 58 | """) 59 | self.assert_ok("""\ 60 | def fn(**kwargs): 61 | print("kwargs is %r" % (kwargs,)) 62 | fn(red=True, blue=False) 63 | """) 64 | self.assert_ok("""\ 65 | def fn(*args, **kwargs): 66 | print("args is %r" % (args,)) 67 | print("kwargs is %r" % (kwargs,)) 68 | fn(1, 2, red=True, blue=False) 69 | """) 70 | self.assert_ok("""\ 71 | def fn(x, y, *args, **kwargs): 72 | print("x is %r, y is %r" % (x, y)) 73 | print("args is %r" % (args,)) 74 | print("kwargs is %r" % (kwargs,)) 75 | fn('a', 'b', 1, 2, red=True, blue=False) 76 | """) 77 | 78 | def test_defining_functions_with_empty_args_kwargs(self): 79 | self.assert_ok("""\ 80 | def fn(*args): 81 | print("args is %r" % (args,)) 82 | fn() 83 | """) 84 | self.assert_ok("""\ 85 | def fn(**kwargs): 86 | print("kwargs is %r" % (kwargs,)) 87 | fn() 88 | """) 89 | self.assert_ok("""\ 90 | def fn(*args, **kwargs): 91 | print("args is %r, kwargs is %r" % (args, kwargs)) 92 | fn() 93 | """) 94 | 95 | def test_partial(self): 96 | self.assert_ok("""\ 97 | from _functools import partial 98 | 99 | def f(a,b): 100 | return a-b 101 | 102 | f7 = partial(f, 7) 103 | four = f7(3) 104 | assert four == 4 105 | """) 106 | 107 | def XXX_test_partial_with_kwargs(self): 108 | self.assert_ok("""\ 109 | from _functools import partial 110 | 111 | def f(a,b,c=0,d=0): 112 | return (a,b,c,d) 113 | 114 | f7 = partial(f, b=7, c=1) 115 | them = f7(10) 116 | assert them == (10,7,1,0) 117 | """) 118 | 119 | def test_wraps(self): 120 | self.assert_ok("""\ 121 | from functools import wraps 122 | def my_decorator(f): 123 | dec = wraps(f) 124 | def wrapper(*args, **kwds): 125 | print('Calling decorated function') 126 | return f(*args, **kwds) 127 | wrapper = dec(wrapper) 128 | return wrapper 129 | 130 | @my_decorator 131 | def example(): 132 | '''Docstring''' 133 | return 17 134 | 135 | assert example() == 17 136 | """) 137 | 138 | 139 | class TestClosures(vmtest.VmTestCase): 140 | def test_closures(self): 141 | self.assert_ok("""\ 142 | def make_adder(x): 143 | def add(y): 144 | return x+y 145 | return add 146 | a = make_adder(10) 147 | print(a(7)) 148 | assert a(7) == 17 149 | """) 150 | 151 | def test_closures_store_deref(self): 152 | self.assert_ok("""\ 153 | def make_adder(x): 154 | z = x+1 155 | def add(y): 156 | return x+y+z 157 | return add 158 | a = make_adder(10) 159 | print(a(7)) 160 | assert a(7) == 28 161 | """) 162 | 163 | def test_closures_in_loop(self): 164 | self.assert_ok("""\ 165 | def make_fns(x): 166 | fns = [] 167 | for i in range(x): 168 | fns.append((lambda i: lambda: i)(i)) 169 | return fns 170 | fns = make_fns(3) 171 | for f in fns: 172 | print(f()) 173 | assert (fns[0](), fns[1](), fns[2]()) == (0, 1, 2) 174 | """) 175 | 176 | def XXX_test_closures_with_defaults(self): 177 | self.assert_ok("""\ 178 | def make_adder(x, y=13, z=43): 179 | def add(q, r=11): 180 | return x+y+z+q+r 181 | return add 182 | a = make_adder(10, 17) 183 | print(a(7)) 184 | assert a(7) == 88 185 | """) 186 | 187 | def test_deep_closures(self): 188 | self.assert_ok("""\ 189 | def f1(a): 190 | b = 2*a 191 | def f2(c): 192 | d = 2*c 193 | def f3(e): 194 | f = 2*e 195 | def f4(g): 196 | h = 2*g 197 | return a+b+c+d+e+f+g+h 198 | return f4 199 | return f3 200 | return f2 201 | answer = f1(3)(4)(5)(6) 202 | print(answer) 203 | assert answer == 54 204 | """) 205 | 206 | def test_closure_vars_from_static_parent(self): 207 | self.assert_ok("""\ 208 | def f(xs): 209 | return lambda: xs[0] 210 | 211 | def g(h): 212 | xs = 5 213 | lambda: xs 214 | return h() 215 | 216 | assert g(f([42])) == 42 217 | """) 218 | 219 | def test_scope_analysis_of_varargs(self): 220 | self.assert_ok("""\ 221 | def f(*xs): 222 | return lambda: xs[0] 223 | print(f(137)()) 224 | """) 225 | 226 | def test_scope_analysis_of_varkw(self): 227 | self.assert_ok("""\ 228 | def f(**kws): 229 | return lambda: kws['y'] 230 | print(f(y=183)()) 231 | """) 232 | 233 | -------------------------------------------------------------------------------- /tests/vmtest.py: -------------------------------------------------------------------------------- 1 | """Testing tools for byterun.""" 2 | 3 | import ast, dis, io, sys, textwrap, types, unittest 4 | 5 | from byterun.interpreter import run, VirtualMachineError 6 | import compiler 7 | 8 | # Make this false if you need to run the debugger inside a test. 9 | CAPTURE_STDOUT = ('-s' not in sys.argv) 10 | # Make this false to see the traceback from a failure inside interpreter. 11 | CAPTURE_EXCEPTION = 1 12 | 13 | 14 | def dis_code(code): 15 | """Disassemble `code` and all the code it refers to.""" 16 | for const in code.co_consts: 17 | if isinstance(const, types.CodeType): 18 | dis_code(const) 19 | 20 | print("") 21 | print(code) 22 | dis.dis(code) 23 | 24 | 25 | class VmTestCase(unittest.TestCase): 26 | 27 | def assert_ok(self, source_code, raises=None): 28 | """Run `code` in our VM and in real Python: they behave the same.""" 29 | 30 | source_code = textwrap.dedent(source_code) 31 | filename = "<%s>" % self.id() 32 | 33 | ref_code = compile(source_code, filename, "exec", 0, 1) 34 | 35 | # Print the disassembly so we'll see it if the test fails. 36 | if 0: dis_code(ref_code) 37 | 38 | # Run the code through our VM and the real Python interpreter, for comparison. 39 | vm_value, vm_exc, vm_stdout = self.run_in_vm(ref_code) 40 | py_value, py_exc, py_stdout = self.run_in_real_python(ref_code) 41 | 42 | self.assert_same_exception(vm_exc, py_exc) 43 | self.assertEqual(vm_stdout.getvalue(), py_stdout.getvalue()) 44 | self.assertEqual(vm_value, py_value) 45 | if raises: 46 | self.assertIsInstance(vm_exc, raises) 47 | else: 48 | self.assertIsNone(vm_exc) 49 | 50 | # Same thing for tailbiter-compiled code run in byterun. 51 | tb_code = compiler.code_for_module(filename, filename, ast.parse(source_code)) 52 | 53 | if ref_code.co_stacksize != tb_code.co_stacksize: 54 | print("Different stacksize: ref %d, tb %d" % (ref_code.co_stacksize, 55 | tb_code.co_stacksize)) 56 | print(source_code) 57 | self.assertTrue(False) 58 | 59 | if 0: dis_code(tb_code) 60 | 61 | tb_value, tb_exc, tb_stdout = self.run_in_vm(tb_code) 62 | 63 | self.assert_same_exception(tb_exc, py_exc) 64 | self.assertEqual(tb_stdout.getvalue(), py_stdout.getvalue()) 65 | self.assertEqual(tb_value, py_value) 66 | if raises: 67 | self.assertIsInstance(tb_exc, raises) 68 | else: 69 | self.assertIsNone(tb_exc) 70 | 71 | # And the same again but with the compiler also running in the vm. 72 | both_code = self.run_compiler_in_vm(source_code) 73 | 74 | both_value, both_exc, both_stdout = self.run_in_vm(ref_code) 75 | 76 | self.assert_same_exception(both_exc, py_exc) 77 | self.assertEqual(both_stdout.getvalue(), py_stdout.getvalue()) 78 | self.assertEqual(both_value, py_value) 79 | if raises: 80 | self.assertIsInstance(both_exc, raises) 81 | else: 82 | self.assertIsNone(both_exc) 83 | 84 | def run_compiler_in_vm(self, source_code): 85 | "Run tailbiter on vm, compiling source_code." 86 | source_code = textwrap.dedent(source_code) 87 | source_ast = ast.parse(source_code) 88 | module_name = filename = "<%s>" % self.id() 89 | 90 | # 1. Make a compiler2 module, which is compiler compiled by itself 91 | # with the resulting code run in vm. 92 | compiler2 = self.get_meta_compiler() 93 | 94 | # 2. Compile source_code by running compiler2 in the vm. 95 | return compiler2.code_for_module(module_name, filename, source_ast) 96 | 97 | def get_meta_compiler(self): 98 | if not hasattr(VmTestCase, 'meta_compiler'): 99 | with open('compiler.py') as f: # XXX needs the right pwd 100 | compiler_source = f.read() 101 | compiler_ast = ast.parse(compiler_source) 102 | compiler_code = compiler.code_for_module('compiler', 103 | 'compiler.py', 104 | compiler_ast) 105 | VmTestCase.meta_compiler = types.ModuleType('compiler2') 106 | run(compiler_code, VmTestCase.meta_compiler.__dict__, None) 107 | return VmTestCase.meta_compiler 108 | 109 | def run_in_vm(self, code): 110 | real_stdout = sys.stdout 111 | 112 | # Run the code through our VM. 113 | 114 | vm_stdout = io.StringIO() 115 | if CAPTURE_STDOUT: # pragma: no branch 116 | sys.stdout = vm_stdout 117 | 118 | vm_value = vm_exc = None 119 | try: 120 | vm_value = run(code, None, None) 121 | except VirtualMachineError: # pragma: no cover 122 | # If the VM code raises an error, show it. 123 | raise 124 | except AssertionError: # pragma: no cover 125 | # If test code fails an assert, show it. 126 | raise 127 | except Exception as e: 128 | # Otherwise, keep the exception for comparison later. 129 | if not CAPTURE_EXCEPTION: # pragma: no cover 130 | raise 131 | vm_exc = e 132 | finally: 133 | sys.stdout = real_stdout 134 | real_stdout.write("-- stdout ----------\n") 135 | real_stdout.write(vm_stdout.getvalue()) 136 | 137 | return vm_value, vm_exc, vm_stdout 138 | 139 | def run_in_real_python(self, code): 140 | real_stdout = sys.stdout 141 | 142 | py_stdout = io.StringIO() 143 | sys.stdout = py_stdout 144 | 145 | py_value = py_exc = None 146 | globs = { 147 | '__builtins__': __builtins__, 148 | '__name__': '__main__', 149 | '__doc__': None, 150 | '__package__': None, 151 | } 152 | 153 | try: 154 | py_value = eval(code, globs, globs) 155 | except AssertionError: # pragma: no cover 156 | raise 157 | except Exception as e: 158 | py_exc = e 159 | finally: 160 | sys.stdout = real_stdout 161 | 162 | return py_value, py_exc, py_stdout 163 | 164 | def assert_same_exception(self, e1, e2): 165 | """Exceptions don't implement __eq__, check it ourselves.""" 166 | self.assertEqual(str(e1), str(e2)) 167 | self.assertIs(type(e1), type(e2)) 168 | --------------------------------------------------------------------------------