├── examples ├── hello_world.pyasm ├── labels.pyasm └── extended_code.pyasm ├── README.md ├── makepy ├── LICENSE ├── dispy.py └── pyasm.py /examples/hello_world.pyasm: -------------------------------------------------------------------------------- 1 | code 2 | consts 2 3 | string "Hello world!" 4 | none 5 | end 6 | instructions 7 | LOAD_CONST 0 8 | PRINT_ITEM 9 | PRINT_NEWLINE 10 | LOAD_CONST 1 11 | RETURN_VALUE 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /examples/labels.pyasm: -------------------------------------------------------------------------------- 1 | code 2 | consts 1 3 | string "Labels are fun!" 4 | end 5 | instructions 6 | print_label: 7 | LOAD_CONST 0 8 | PRINT_ITEM 9 | PRINT_NEWLINE 10 | JUMP_ABSOLUTE print_label 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyasm 2 | Assembler and disassembler for compiled Python 3 | 4 | ### makepy 5 | Builds a pyasm file into a pyc file 6 | ``` 7 | ./makepy hello_world.pyasm 8 | python hello_world.pyc 9 | ``` 10 | 11 | ## dispy.py 12 | Disassembles a pyc file into a pyasm file 13 | ``` 14 | ./dispy.py hello_world.pyc 15 | ``` 16 | -------------------------------------------------------------------------------- /makepy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import marshal 4 | import sys 5 | import pyasm 6 | from py_compile import MAGIC 7 | 8 | if len(sys.argv) != 2: 9 | print "Incorrect number of arguments!" 10 | exit() 11 | 12 | c = pyasm.parse_file(sys.argv[1]) 13 | out_name = sys.argv[1].split('/')[-1].split('.')[0] + '.pyc' 14 | o = open(out_name, 'wb') 15 | o.write(MAGIC) 16 | o.write('\x00\x00\x00\x00') 17 | o.write(marshal.dumps(c)) 18 | o.close() 19 | -------------------------------------------------------------------------------- /examples/extended_code.pyasm: -------------------------------------------------------------------------------- 1 | code 2 | consts 0x9102 3 | 0x9100 string "jello world" 4 | none 5 | end 6 | instructions 7 | EXTENDED_ARG 8 | LOAD_CONST 0x9100 9 | NOP 10 | JUMP_FORWARD 0x191 11 | NOP 12 | 133 * EXTENDED_ARG 0xFFFF 13 | EXTENDED_ARG 14 | PRINT_ITEM 15 | JUMP_FORWARD 0x191 16 | NOP 17 | 133 * EXTENDED_ARG 0xFFFF 18 | EXTENDED_ARG 19 | PRINT_NEWLINE 20 | JUMP_FORWARD 0x191 21 | NOP 22 | 133 * EXTENDED_ARG 0xFFFF 23 | EXTENDED_ARG 24 | LOAD_CONST 0x9101 25 | RETURN_VALUE 26 | NOP 27 | EXTENDED_ARG 0xFFFF 28 | JUMP_FORWARD 0xFB33 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Gabe Kirkpatrick 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | 15 | -------------------------------------------------------------------------------- /dispy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from dis import opname, opmap 4 | import marshal 5 | import sys 6 | import types 7 | 8 | def disassemble(s): 9 | i = 0 10 | #r = '' 11 | o = [] 12 | dupe_count = 1 13 | while i < len(s.co_code): 14 | op = ord(s.co_code[i]) 15 | r = opname[op] + ' ' 16 | if op >= 90: # these are the ones with args 17 | oparg = ord(s.co_code[i+1]) | (ord(s.co_code[i+2]) << 8) 18 | r += int_to_str(oparg) 19 | i += 2 20 | i += 1 21 | comment = generate_autocomment(s, op, oparg) 22 | if comment: 23 | r += " # " + comment 24 | if len(o) > 0 and (r == o[-1] or r == o[-1].split(' ', 2)[-1]): 25 | dupe_count += 1 26 | o[-1] = int_to_str(dupe_count) + " * " + r 27 | continue 28 | else: 29 | dupe_count = 1 30 | o.append(r) 31 | return o 32 | 33 | def disassemble_file(filename): 34 | o = open(filename, 'rb') 35 | o.read(8) 36 | c = marshal.load(o) 37 | o.close() 38 | f = open(filename.split('.')[0] + '.pyasm', 'w') 39 | write_object(f, c, 0, 1) 40 | f.close() 41 | 42 | def write_object(f, o, indents, count): 43 | f.write('\t' * indents) 44 | if count > 1: 45 | f.write(int_to_str(count) + " * ") 46 | if type(o) == types.CodeType: 47 | f.write('code ') 48 | write_code(f, o, indents) 49 | elif type(o) == types.StringType: 50 | f.write('string ') 51 | write_string(f, o) 52 | elif type(o) == types.IntType: 53 | f.write('int ') 54 | write_int(f, o) 55 | elif type(o) == types.ListType: 56 | f.write('list ') 57 | write_list(f, o, indents) 58 | elif type(o) == types.TupleType: 59 | f.write('tuple ') 60 | write_list(f, o, indents) 61 | elif type(o) == types.NoneType: 62 | write_none(f, o) 63 | elif type(o) == types.FloatType: 64 | f.write('float ') 65 | write_float(f, o) 66 | 67 | def write_float(f, d): 68 | f.write(str(d) + '\n') 69 | 70 | def write_none(f, s): 71 | f.write('none\n') 72 | 73 | def write_string(f, s): 74 | f.write('"' + s.encode("string-escape").replace('"', '\\"') + '"\n') 75 | 76 | def int_to_str(i): 77 | #if i != 0 and (i & 0xF == 0 or i & 0xF == 0xF): 78 | # return hex(i) 79 | return str(i) 80 | 81 | def write_int(f, i): 82 | f.write(int_to_str(i) + '\n') 83 | 84 | def write_list(f, l, indents): 85 | f.write(str(len(l)) + '\n') 86 | dupe_count = 1 87 | i = 0 88 | while i < len(l): 89 | dupe_count = 1 90 | while i < len(l) - 1 and l[i] == l[i + 1]: 91 | dupe_count += 1 92 | i += 1 93 | 94 | write_object(f, l[i], indents + 1, dupe_count) 95 | i += 1 96 | 97 | f.write(('\t' * indents) + 'end\n') 98 | 99 | def generate_autocomment(c, instruction, oparg): 100 | comment = '' 101 | if instruction == opmap['LOAD_CONST']: 102 | if oparg < len(c.co_consts): 103 | comment = str(c.co_consts[oparg]) 104 | else: 105 | comment = "Error: index outside of consts" 106 | elif instruction == opmap['LOAD_FAST'] or instruction == opmap['STORE_FAST']: 107 | if oparg < len(c.co_varnames): 108 | comment = str(c.co_varnames[oparg]) 109 | else: 110 | comment = "Error: index outside of varnames" 111 | elif instruction == opmap['LOAD_NAME'] or instruction == opmap['STORE_NAME']: 112 | if oparg < len(c.co_names): 113 | comment = str(c.co_names[oparg]) 114 | else: 115 | comment = "Error: index outside of names" 116 | 117 | if len(comment) < 200: 118 | return comment 119 | return None 120 | 121 | def write_code(f, c, indents): 122 | f.write('\n') 123 | if c.co_argcount != 0: 124 | f.write('\t' * (indents + 1)) 125 | f.write('arg_count ' + str(c.co_argcount) + '\n') 126 | if c.co_nlocals != 0: 127 | f.write('\t' * (indents + 1)) 128 | f.write('n_locals ' + str(c.co_nlocals) + '\n') 129 | if c.co_stacksize != 0: 130 | f.write('\t' * (indents + 1)) 131 | f.write('stack_size ' + str(c.co_stacksize) + '\n') 132 | if c.co_flags != 0: 133 | f.write('\t' * (indents + 1)) 134 | f.write('flags ' + str(c.co_flags) + '\n') 135 | 136 | if len(c.co_consts) != 0: 137 | f.write('\t' * (indents + 1)) 138 | f.write('consts ') 139 | write_list(f, c.co_consts, indents + 1) 140 | if len(c.co_names) != 0: 141 | f.write('\t' * (indents + 1)) 142 | f.write('names ') 143 | write_list(f, c.co_names, indents + 1) 144 | if len(c.co_varnames) != 0: 145 | f.write('\t' * (indents + 1)) 146 | f.write('varnames ') 147 | write_list(f, c.co_varnames, indents + 1) 148 | if len(c.co_freevars) != 0: 149 | f.write('\t' * (indents + 1)) 150 | f.write('freevars ') 151 | write_list(f, c.co_freevars, indents + 1) 152 | if len(c.co_cellvars) != 0: 153 | f.write('\t' * (indents + 1)) 154 | f.write('cellvars ') 155 | write_list(f, c.co_cellvars, indents + 1) 156 | 157 | instructions = disassemble(c) 158 | f.write('\t' * (indents + 1)) 159 | f.write('instructions\n') 160 | for i in instructions: 161 | f.write('\t' * (indents + 2)) 162 | f.write(i) 163 | f.write('\n') 164 | 165 | f.write('\t' * (indents + 1)) 166 | f.write('end\n') 167 | f.write('\t' * (indents + 1)) 168 | f.write('name ') 169 | write_string(f, c.co_name) 170 | f.write('\t' * (indents + 1)) 171 | f.write('filename ') 172 | write_string(f, c.co_filename) 173 | f.write('\t' * (indents + 1)) 174 | f.write('lnotab ') 175 | write_string(f, c.co_lnotab) 176 | f.write('\t' * indents) 177 | f.write('end\n') 178 | 179 | if __name__ == "__main__": 180 | disassemble_file(sys.argv[1]) 181 | -------------------------------------------------------------------------------- /pyasm.py: -------------------------------------------------------------------------------- 1 | from dis import opmap 2 | from types import CodeType 3 | from StringIO import StringIO 4 | import dispy 5 | 6 | def exec_input(consts=None, names=None, varnames=None, freevars=None, cellvars=None): 7 | exec compile_input(consts=consts, names=names, varnames=varnames, freevars=freevars, cellvars=cellvars) 8 | 9 | def compile_input(consts=None, names=None, varnames=None, freevars=None, cellvars=None): 10 | out = StringIO() 11 | out.write('code\n') 12 | if consts: 13 | dispy.write_list(out, consts, 1, 'consts') 14 | if names: 15 | dispy.write_list(out, names, 1, 'names') 16 | if varnames: 17 | dispy.write_list(out, varnames, 1, 'varnames') 18 | if freevars: 19 | dispy.write_list(out, freevars, 1, 'freevars') 20 | if cellvars: 21 | dispy.write_list(out, consts, 1, 'cellvars') 22 | while 1: 23 | print('pyasm>'), 24 | line = raw_input() 25 | if line.strip() == '': 26 | break; 27 | out.write(line + '\n') 28 | out.write('end\n') 29 | out.seek(0) 30 | return read_object(out) 31 | 32 | def parse_file(filename): 33 | f = open(filename, 'r') 34 | o = read_object(f) 35 | f.close() 36 | return o 37 | 38 | def parse_int(i): 39 | if i[:2] == '0x': 40 | return int(i, 16) 41 | elif i[:2] == '0b': 42 | return int(i, 2) 43 | return int(i) 44 | 45 | def read_line(f): 46 | return f.readline().strip() 47 | 48 | # l is typically the next line in the stream, but can be overridden in some circumstances 49 | def read_object(f, l=None): 50 | if l == None: 51 | l = read_line(f) 52 | if l.split()[0] == 'string': 53 | return read_string(l) 54 | elif l.split()[0] == 'int' or l.split()[0] == 'int64': 55 | return read_int(l) 56 | elif l.split()[0] == 'float': 57 | return read_float(l) 58 | elif l.split()[0] == 'code': 59 | return read_code(f) 60 | elif l.split()[0] == 'none': 61 | return read_none(l) 62 | elif l.split()[0] == 'list': 63 | return read_list(f, parse_int(l.split()[1])) 64 | elif l.split()[0] == 'tuple': 65 | return tuple(read_list(f, parse_int(l.split()[1]))) 66 | elif l.split()[0] == 'dict': 67 | return read_dict(f, parse_int(l.split()[1])) 68 | elif l.split()[0] == 'include': 69 | return parse_file(read_string(l) + '.pyasm') 70 | 71 | def read_dict(f, count): 72 | r = {} 73 | for i in range(count): 74 | r[read_object(f)] = read_object(f) 75 | return r 76 | 77 | def is_number(s): 78 | if s >= '0' and s <= '9': 79 | return True 80 | return False 81 | 82 | def read_list(f, count): 83 | r = [None] * count 84 | index = 0 85 | l = read_line(f) 86 | while l != 'end': 87 | if is_number(l.split()[0][0]) and l.split()[1] != '*': 88 | index = parse_int(l.split()[0]) 89 | l = l.split(' ', 1)[1] 90 | r[index] = read_object(f, l) 91 | index += 1 92 | elif is_number(l.split()[0][0]): 93 | count = parse_int(l.split()[0]) 94 | l = l.split(' ', 2)[2] 95 | obj = read_object(f, l) 96 | for i in range(count): 97 | r[index] = obj 98 | index += 1 99 | else: 100 | r[index] = read_object(f, l) 101 | index += 1 102 | l = read_line(f) 103 | return r 104 | 105 | def read_string(l): 106 | s = l.split('"', 1)[1] 107 | end_index = 0 108 | escaped = False 109 | for i in range(len(s)): 110 | if escaped: 111 | continue 112 | if s[i] == '"': 113 | end_index = i 114 | break 115 | if s[i] == '\\': 116 | escaped = True 117 | 118 | return s[:i].decode("string-escape") 119 | 120 | def read_int(l): 121 | return parse_int(l.split()[1]) 122 | 123 | def read_float(l): 124 | return float(l.split()[1]) 125 | 126 | def read_none(l): 127 | return None 128 | 129 | def process_labels(f): 130 | labels = {} 131 | l = read_line(f).split('#')[0].strip() 132 | pos = 0 133 | 134 | while l != 'end': 135 | count = 1 136 | instruction_len = 1 137 | if l[-1] == ':': # check for a : to see if the current line is a label 138 | labels[l[:-1]] = pos 139 | count = 0 # not an instruction 140 | elif len(l.split()) > 2: 141 | if l.split()[1] == '*': 142 | count = parse_int(l.split()[0]) 143 | l = l.split(' ', 2)[-1] 144 | 145 | elif len(l.split()) == 2: 146 | instruction_len = 3 147 | pos += count * instruction_len 148 | 149 | l = read_line(f) 150 | 151 | return labels 152 | 153 | def assemble_instructions(f): 154 | # save the position at the start of the instructions so we can restore it after we process labels 155 | start_pos = f.tell() 156 | labels = process_labels(f) 157 | 158 | f.seek(start_pos) 159 | l = read_line(f).split('#')[0].strip() 160 | sio = StringIO() 161 | while l != 'end': 162 | count = 1 163 | if l[-1] == ':': # skip labels 164 | count = 0 165 | elif len(l.split()) > 2: 166 | if l.split()[1] == '*': 167 | count = parse_int(l.split()[0]) 168 | l = l.split(' ', 2)[-1] 169 | for i in range(count): 170 | if is_number(l[0]): 171 | sio.seek(parse_int(l.split()[0])) 172 | l = l.split(' ', 1)[1] 173 | line = l.split() 174 | sio.write(chr(opmap[line[0]])) 175 | if len(line) == 2: 176 | if line[1][0].isdigit(): # oparg is an immediate value 177 | oparg = parse_int(line[1]) 178 | else: 179 | oparg = labels[line[1]] 180 | 181 | sio.write(chr(oparg & 0xFF)) 182 | sio.write(chr((oparg >> 8) & 0xFF)) 183 | l = read_line(f).split('#')[0].strip() 184 | code = sio.getvalue() 185 | sio.close() 186 | return code 187 | 188 | def read_code(f): 189 | arg_count = 0 190 | n_locals = 0 191 | stack_size = 0 192 | flags = 0 193 | first_line_no = 0 194 | filename = '' 195 | name = '' 196 | lnotab = '' 197 | consts = () 198 | names = () 199 | varnames = () 200 | freevars = () 201 | cellvars = () 202 | code = '' 203 | l = read_line(f) 204 | while l != 'end': 205 | line = l.split() 206 | if line[0] == 'arg_count': 207 | arg_count = parse_int(line[1]) 208 | elif line[0] == 'n_locals': 209 | n_locals = parse_int(line[1]) 210 | elif line[0] == 'stack_size': 211 | stack_size = parse_int(line[1]) 212 | elif line[0] == 'flags': 213 | flags = parse_int(line[1]) 214 | elif line[0] == 'consts': 215 | consts = tuple(read_list(f, parse_int(line[1]))) 216 | elif line[0] == 'names': 217 | names = tuple(read_list(f, parse_int(line[1]))) 218 | elif line[0] == 'varnames': 219 | varnames = tuple(read_list(f, parse_int(line[1]))) 220 | elif line[0] == 'freevars': 221 | freevars = tuple(read_list(f, parse_int(line[1]))) 222 | elif line[0] == 'cellvars': 223 | cellvars = tuple(read_list(f, parse_int(line[1]))) 224 | elif line[0] == 'instructions': 225 | code = assemble_instructions(f) 226 | elif line[0] == 'filename': 227 | filename = l.split('"')[1] 228 | elif line[0] == 'name': 229 | name = intern(l.split('"')[1]) 230 | elif line[0] == 'lnotab': 231 | lnotab = l.split('"')[1].decode('string-escape') 232 | elif line[0] == 'first_line_no': 233 | first_line_no = parse_int(line[1]) 234 | l = read_line(f) 235 | return CodeType(arg_count, n_locals, stack_size, flags, code, consts, names, varnames, filename, name, first_line_no, lnotab, freevars, cellvars) 236 | 237 | --------------------------------------------------------------------------------