├── .gitignore ├── Art ├── AnimationBank.bin ├── Battle Animation │ └── PrimaryDataGroup.bin ├── MapBankandBattleBank.bin ├── MiscGraphicsBank.bin └── UncompressedSprites.bin ├── Map └── MapConstructionBank.bin ├── README ├── Text └── TextBank.bin ├── disassembler.py ├── ebasm-mra.txt ├── ebasm.py └── instructionset.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.smc 3 | *.pyc 4 | 65816info.txt 5 | -------------------------------------------------------------------------------- /Art/AnimationBank.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Art/AnimationBank.bin -------------------------------------------------------------------------------- /Art/Battle Animation/PrimaryDataGroup.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Art/Battle Animation/PrimaryDataGroup.bin -------------------------------------------------------------------------------- /Art/MapBankandBattleBank.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Art/MapBankandBattleBank.bin -------------------------------------------------------------------------------- /Art/MiscGraphicsBank.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Art/MiscGraphicsBank.bin -------------------------------------------------------------------------------- /Art/UncompressedSprites.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Art/UncompressedSprites.bin -------------------------------------------------------------------------------- /Map/MapConstructionBank.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Map/MapConstructionBank.bin -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | THIS IS THE README FILE 2 | PLEASE READ IT 3 | -------------------------------------------------------------------------------- /Text/TextBank.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tripped/ebasm/ce71bc3fed2aab2d15284e86079bdb603bed2c31/Text/TextBank.bin -------------------------------------------------------------------------------- /disassembler.py: -------------------------------------------------------------------------------- 1 | # 2 | # Disassembler core 3 | # 4 | # Defines the functions for reading and expressing disassembly primitives, i.e. 5 | # opcodes and their operands. 6 | # 7 | 8 | import re 9 | 10 | 11 | from instructionset import * 12 | 13 | from collections import namedtuple 14 | 15 | #------------------------------------------------------------------------------ 16 | # Stuff for reading and formatting operands 17 | #------------------------------------------------------------------------------ 18 | 19 | Operand = namedtuple('Operand', ['value', 'len']) 20 | Operand.__len__ = lambda self: self.len 21 | Operand.__int__ = lambda self: self.value 22 | 23 | # 24 | # Functions for reading primitives from a stream. Each returns a tuple 25 | # containing the primitive value and its size in bytes. 26 | # 27 | def byte(src): 28 | return Operand(next(src), 1) 29 | 30 | def short(src): 31 | return Operand((next(src) | (next(src) << 8)), 2) 32 | 33 | def long(src): 34 | return Operand((next(src) | (next(src) << 8) | (next(src) << 16)), 3) 35 | 36 | 37 | # 38 | # We need these to treat bytes and shorts as signed integers, e.g. when 39 | # computing absolute targets of relative jumps. 40 | # 41 | def signedbyte(n): 42 | '''Sign-extends a byte to a signed integer.''' 43 | return (int(n) ^ 0x80) - 0x80 44 | 45 | def signedshort(n): 46 | '''Sign-extends a short to a signed integer.''' 47 | return (int(n) ^ 0x8000) - 0x8000 48 | 49 | 50 | # 51 | # Each type of 65816 operand is processed by a pair of functions: a consumer 52 | # and a stringifier. The consumer reads the correct number of bytes from a 53 | # stream, and the stringifier renders the operand as a string. 54 | # 55 | # Each function takes a status as its second parameter, to handle those operand 56 | # formats that depend on the current status of the CPU. 57 | # 58 | formats = { 'const*', 'const+', 'short', 'byte', 'addr', 'near', 'nearx', 59 | 'long', 'dp', 'sr', 'src,dst' } 60 | 61 | operand_consumers = { 62 | 'const*': lambda s,p: byte(s) if p.m else short(s), 63 | 'const+': lambda s,p: byte(s) if p.x else short(s), 64 | 'short': lambda s,p: short(s), 65 | 'byte': lambda s,p: byte(s), 66 | 'addr': lambda s,p: short(s), 67 | 'near': lambda s,p: byte(s), 68 | 'nearx': lambda s,p: short(s), 69 | 'long': lambda s,p: long(s), 70 | 'dp': lambda s,p: byte(s), 71 | 'sr': lambda s,p: byte(s), 72 | 'src,dst':lambda s,p: short(s) 73 | } 74 | 75 | operand_stringifiers = { 76 | 'const*': lambda n,p: '${:0{w}X}'.format(n, w = 2 if p.m else 4), 77 | 'const+': lambda n,p: '${:0{w}X}'.format(n, w = 2 if p.x else 4), 78 | 'short': '${:04X}'.format, 79 | 'byte': '${:02X}'.format, 80 | 'addr': '${:04X}'.format, 81 | 'near': lambda n,p: '${:04X}'.format(p.pc + 2 + signedbyte(n)), 82 | 'nearx': lambda n,p: '${:04X}'.format(p.pc + 3 + signedshort(n)), 83 | 'long': '${:06X}'.format, 84 | 'dp': '${:02X}'.format, 85 | 'sr': '{}'.format, 86 | 'src,dst':lambda n,p: '${:02X},${:02X}'.format((n & 0xFF), (n & 0xFF00) >> 8) 87 | } 88 | 89 | # 90 | # Build a regexp for finding operand types in a string. We sort the keys in 91 | # order of descending length to avoid missing a longer key by finding a short 92 | # key first, e.g., "BRL nearx" matches "near" before it matches "nearx". 93 | # 94 | pattern = re.compile( 95 | '|'.join(map(re.escape, sorted(formats, key=len, reverse=True))) 96 | ) 97 | 98 | 99 | def makereader(description): 100 | '''Makes a reader function for the specified opcode, e.g., 101 | 102 | f = makereader('LDX #const+') 103 | 104 | The returned function can be applied to a byte stream and a status 105 | object to pull a correctly-sized operand from the stream; for example, 106 | f(src, Status(x=1,...)) will read only one byte from src, because the 107 | LDX #const+ instruction matches the size of the CPU's X register.''' 108 | 109 | match = pattern.search(description) 110 | if match: 111 | return operand_consumers[match.group(0)] 112 | else: 113 | return lambda *args: None 114 | 115 | 116 | def makestringifier(description): 117 | '''Makes a stringifier function for the specified opcode, e.g., 118 | 119 | f = makestringifier('LDX #const+') 120 | 121 | The returned function can be applied to an operand value and a status 122 | object to get an assembly language representation of the instruction, 123 | e.g., f(32, Status(x=1,...)) will return "LDX #$20" ''' 124 | 125 | match = pattern.search(description) 126 | 127 | if not match: 128 | return lambda *args: description 129 | 130 | stringifier = operand_stringifiers[match.group(0)] 131 | description = pattern.sub('{}', description) 132 | 133 | return lambda n,s: description.format(stringifier(n, s)) 134 | 135 | 136 | # 137 | # Make a reader and stringifier for every opcode in the 65816 instruction set 138 | # 139 | readers = { k : makereader(v) for k,v in instruction_set.items() } 140 | stringifiers = { k : makestringifier(v) for k,v in instruction_set.items() } 141 | 142 | 143 | 144 | 145 | #------------------------------------------------------------------------------ 146 | # 147 | # 148 | # 149 | #------------------------------------------------------------------------------ 150 | 151 | 152 | 153 | 154 | # 155 | # Status represents the state of the 65816 at a particular point in time. It 156 | # consists of the values of the program counter register and the m and x bits 157 | # from the CPU status register. 158 | # 159 | Status = namedtuple('Status', ['pbr', 'pc', 'm', 'x']) 160 | 161 | 162 | 163 | class Instruction(object): 164 | '''An instruction is an instance of a 65816 machine instruction, including 165 | the machine opcode, its operand, and the status of the CPU at that point 166 | in the program.''' 167 | 168 | def __init__(self, op, operand, status): 169 | self.op = op 170 | if operand: 171 | self.operand = operand[0] 172 | self.operand_len = operand[1] 173 | else: 174 | self.operand = 0 175 | self.operand_len = 0 176 | self.status = status 177 | 178 | def __len__(self): 179 | return 1 + self.operand_len 180 | 181 | def __iter__(self): 182 | '''Iterates over the raw bytes of the instruction.''' 183 | yield self.op 184 | for i in range(self.operand_len): 185 | yield (self.operand >> i*8) & 0xFF 186 | 187 | def __str__(self): 188 | '''Returns the instruction as an assembly language string.''' 189 | return stringifiers[self.op](self.operand, self.status) 190 | 191 | 192 | 193 | 194 | def disassembly(inst, address=True, machine=True, status=True): 195 | '''Returns a string representation of the disassembly of an instruction. 196 | @param inst The instruction to disassemble 197 | @param address If true, the instruction's address will be included 198 | @param machine If true, the raw machine code will be included''' 199 | 200 | result = '' 201 | 202 | if address: 203 | result += '{:02X}/{:04X}: '.format(inst.status.pbr, inst.status.pc) 204 | if machine: 205 | result += '{:13}'.format(' '.join(map('{:02X}'.format, inst))) 206 | 207 | result += '{:20}'.format(str(inst)) 208 | 209 | if status: 210 | result += '{} {}'.format('-M-' if inst.status.m else '---', 211 | '-X-' if inst.status.x else '---') 212 | 213 | return result 214 | 215 | # 216 | # This is horrible, but getting slightly better 217 | # 218 | 219 | def instruction(src, status): 220 | '''Reads a single instruction from a stream and returns a pair (inst, stat) 221 | where inst is an object representing the instruction, and stat is the 222 | 'successor' status of the instruction, i.e. the state of the CPU after 223 | executing the instruction. 224 | 225 | If the instruction read was a branch instruction other than BRA or BRL, 226 | status is a tuple consisting of the successor status for both possible 227 | paths of the branch. 228 | 229 | @param src An iterable byte stream from which to read the instruction 230 | @param status An object recording state of the 65816 at the place in 231 | the program where the instruction occurs; must include m, x 232 | flags and pc register value.''' 233 | 234 | # Get the opcode and its operand 235 | op = next(src) 236 | operand = readers[op](src, status) 237 | 238 | # Now we have enough information to compute the successor status. It'll go 239 | # like this: 240 | # - First, the program counter of the successor is incremented 241 | # - successor.pc = status.pc + 1 + len(operand) 242 | # - Then we compute the new status of m and x flags: 243 | # - If the op is SEP or REP, the status change is computed from 244 | # its operand 245 | # - If the op is a JSL/JSR, we can do one of two things: 246 | # 1. Assume that it resets m and x to 0. This is probably a good 247 | # assumption in most cases. 248 | # 2. Examine the function being jumped to, if necessary reading 249 | # and disassembling it first, to see if it changes m or x. 250 | # Determining this in general is of course impossible, but we 251 | # could probably make some good guesses. 252 | # - In all other cases, m and x are unchanged. 253 | # 254 | 255 | # Determine new values of m and x flags 256 | m = status.m 257 | x = status.x 258 | 259 | # SEP and REP modify status bits directly 260 | if op == 0xE2: # SEP 261 | if int(operand) & 0x20: m = 1 262 | if int(operand) & 0x10: x = 1 263 | elif op == 0xC2: # REP 264 | if int(operand) & 0x20: m = 0 265 | if int(operand) & 0x10: x = 0 266 | # Subroutine jumps are assumed to reset status bits 267 | elif op in {0x22, 0x20, 0xFC}: 268 | m = 0 269 | x = 0 270 | 271 | # TODO: Also compute c and e bits, since the e (emulation) bit forces all 272 | # registers to 8 bits as well. 273 | ''' 274 | c = status.c 275 | e = status.e 276 | if op == CLC: 277 | c = 0 278 | elif op == SEC: 279 | c = 1 280 | elif op in { ops that change c }: 281 | c = Unknown 282 | elif op == XCE: 283 | c,e = e,c 284 | ''' 285 | 286 | 287 | inst = Instruction(op, operand, status) 288 | 289 | # Compute the successor address 290 | if op == 0x4C: # JMP absolute 291 | pbr = status.pbr 292 | pc = int(operand) 293 | elif op == 0x5C: # JML absolute long 294 | pbr = bank(int(operand)) 295 | pc = offset(int(operand)) 296 | elif op == 0x80: # BRA near 297 | pbr = status.pbr 298 | pc = status.pc + signedbyte(operand) 299 | elif op == 0x82: # BRL near extended 300 | pbr = status.pbr 301 | pc = status.pc + signedshort(operand) 302 | else: 303 | pbr = status.pbr 304 | pc = status.pc 305 | 306 | successor = Status(pbr, pc, m, x) 307 | 308 | # For conditional branches we produce two successors, one for each path. 309 | if isbranch(op): 310 | taken = Status(pbr = status.pbr, m = m, x = x, 311 | pc = status.pc + signedbyte(operand)) 312 | successor = (taken, successor) 313 | 314 | return (inst, successor) 315 | 316 | -------------------------------------------------------------------------------- /ebasm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # A quick-and-dirty 65816 disassembler. 4 | # 5 | 6 | from array import array 7 | 8 | from disassembler import * 9 | 10 | 11 | #------------------------------------------------------------------------------ 12 | # HEURISTIC DISASSEMBLY 13 | #------------------------------------------------------------------------------ 14 | 15 | def subroutine(src, status): 16 | '''Reads a subroutine from src, reading instructions sequentially until an 17 | RTL (0x6B) or RTS (0x60) opcode is reached. Returns pair (func,status), 18 | where func is an object representing the subroutine and status is the 19 | expected state of the CPU on return from the subroutine.''' 20 | instructions = [] 21 | inst = None 22 | while not inst or inst.op != 0x6b and inst.op != 0x60: 23 | inst,status = instruction(src, status) 24 | instructions.append(inst) 25 | return (instructions, status) 26 | 27 | 28 | def disassemble(src, base, flags=0): 29 | # Initialize status 30 | status = Status((base & 0xFF0000) >> 16, base & 0xFFFF, flags, flags) 31 | instructions,status = subroutine(src, status) 32 | for i in instructions: 33 | print(disassembly(i)) 34 | 35 | 36 | class Subroutine(object): 37 | def __init__(self, address): 38 | self.instructions = [] 39 | self.address = address 40 | self.exitstatus = None 41 | 42 | def append(self, inst): 43 | self.instructions.append(inst) 44 | 45 | 46 | def recursive_subroutine(container, address, status, entities): 47 | 48 | address = snesoffset(address) 49 | 50 | if address in entities: 51 | if not isinstance(entities[address], Subroutine): 52 | print("Warning: attempted to recurse into non-subroutine") 53 | return entities[address] 54 | 55 | print('Found new subroutine at ${:06X}'.format(address)) 56 | 57 | src = iterfrom(container, fileoffset(address)) 58 | status = Status(bank(address), offset(address), status.m, status.x) 59 | 60 | # Register a subroutine in the entities table first, just in case we get 61 | # here again through recursion 62 | sub = Subroutine(address) 63 | entities[address] = sub 64 | 65 | inst = None 66 | while not inst or inst.op != 0x60 and inst.op != 0x6B: 67 | inst,status = instruction(src, status) 68 | sub.append(inst) 69 | 70 | if inst.op == 0x22: 71 | print(disassembly(inst)) 72 | recursive_subroutine(container, inst.operand, status, entities) 73 | elif inst.op == 0x20: 74 | print(disassembly(inst)) 75 | subadr = makeadr(inst.status.pbr, inst.operand) 76 | recursive_subroutine(container, subadr, status, entities) 77 | 78 | #sub.exitstatus = status 79 | # ohsnap! so, we set exit status here at the end... but the exitstatus of 80 | # the subroutine object is None until this point, and we _might_ get back to 81 | # this subroutine through mutual recursion.... in which case whatever gets 82 | # there will see a 'None' status! 83 | # 84 | # Um, quickfix for this is to offer the entry status as a best guess for the 85 | # exit status? That doesn't make much sense though. 86 | # 87 | # Better solution for now: don't use the exit status from subroutines. 88 | # 89 | # Additionally, we need to start being careful about status.pbr and status.pc; 90 | # the ending status of this function will be the successor of an RTS or RTL, 91 | # which of course isn't known statically for pbr and pc. Those fields should 92 | # be marked with an "Unknown" value, preferably. That way if we screw up and 93 | # let Unknown pbr and pc values propagate to other disassemblies, it will be 94 | # obvious what has happened, as opposed to merely seeing wrong program counter 95 | # values. 96 | return sub 97 | 98 | 99 | 100 | def recursive_disassemble(container, address, status): 101 | '''Disassembles a segment of code, recursively following subroutine calls. 102 | The 'top' level of code is only followed until the first untraceable 103 | jump, i.e., any indirect jump or RTS/RTL.''' 104 | 105 | src = iterfrom(container, fileoffset(address)) 106 | entities = dict() 107 | while True: 108 | inst, status = instruction(src, status) 109 | entities[makeadr(inst.status.pbr, inst.status.pc)] = inst 110 | 111 | # JSL long 112 | if inst.op == 0x22: 113 | recursive_subroutine(container, inst.operand, status, entities) 114 | # JSR short 115 | elif inst.op == 0x20: 116 | subadr = makeadr(inst.status.pbr, inst.operand) 117 | recursive_subroutine(container, subadr, status, entities) 118 | # Untraceable indirect jumps 119 | elif inst.op in { 0x6C, 0x7C, 0xDC, 0xFC }: 120 | print("Warning: untraceable jump") 121 | break 122 | # Subroutine return 123 | elif inst.op in { 0x60, 0x6B }: 124 | break 125 | 126 | # Traceable jumps 127 | elif inst.op in { 0x4C, 0x5C }: 128 | if inst.op == 0x4C: 129 | newadr = makeadr(inst.status.pbr, inst.operand) 130 | else: 131 | newadr = inst.operand 132 | 133 | src = iterfrom(container, fileoffset(newadr)) 134 | status = Status(bank(newadr), offset(newadr), status.m, status.x) 135 | 136 | return entities, status 137 | 138 | 139 | def bank(address): 140 | return (address & 0xFF0000) >> 16 141 | 142 | def offset(address): 143 | return address & 0xFFFF 144 | 145 | def makeadr(bank, offset): 146 | return (bank << 16) | offset 147 | 148 | def fileoffset(adr): 149 | '''Returns the file offset corresponding to the given address. adr can be 150 | either a file offset (in which case no transformation occurs) or a hirom 151 | virtual memory offset.''' 152 | if 0xC00000 <= adr < 0x1000000: 153 | return adr - 0xC00000 154 | else: 155 | return adr 156 | 157 | def snesoffset(adr): 158 | '''Returns the virtual address corresponding to the given address. adr can 159 | be a virtual address (in which no transformation occurs) or a file 160 | offset.''' 161 | if 0 <= adr <= 0x300000: 162 | return adr + 0xC00000 163 | else: 164 | return adr 165 | 166 | # TODO: this should be a parameter 167 | goodbanks = { 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xEE, 0xEF } 168 | 169 | def testwindow(container, start, flags=0): 170 | 171 | is_code = 0 172 | not_code = 0 173 | 174 | src = iterfrom(container, start) 175 | address = start + 0xC00000 176 | status = Status((address & 0xFF0000) >> 16, address & 0xFFFF, flags, flags) 177 | 178 | instructions = [] 179 | inst = None 180 | while not inst or inst.op != 0x6b and inst.op != 0x60: 181 | inst,status = instruction(src, status) 182 | 183 | explanation = '' 184 | 185 | # Check for 'bad' codes 186 | if inst.op == 0x00: 187 | not_code += 50 # BRK 188 | explanation = 'BRK encountered' 189 | 190 | if inst.op == 0x02: 191 | not_code += 10 # COP 192 | explanation = 'COP encountered' 193 | 194 | if inst.op in {0x22, 0x5C} and bank(inst.operand) not in goodbanks: 195 | not_code += 100 196 | explanation = 'JMP/JSL to non-code bank' 197 | 198 | if inst.op in {0x1C, 0x14, 0x0C, 0x04}: 199 | not_code += 20 200 | explanation = 'TRB/TSB encountered' 201 | 202 | 203 | # Check for 'good' codes 204 | if 'const' in instruction_set[inst.op] and \ 205 | not status.m and \ 206 | inst.operand < 256: 207 | is_code += 20 208 | explanation = 'Likely immediate operand' 209 | 210 | if inst.op in {0x22, 0x5C} and bank(inst.operand) in goodbanks: 211 | is_code += 10 212 | explanation = 'JMP/JSL to known code bank' 213 | 214 | if inst.op in {0x6b, 0x60} and instructions[len(instructions)-1].op == 0x2B: 215 | # PLD followed by RTS or RTL 216 | is_code += 20 217 | explanation = 'Return from subroutine sequence' 218 | 219 | print('{:50}{}'.format(disassembly(inst), explanation)) 220 | instructions.append(inst) 221 | 222 | print('testwindow analysis for ${:06X}'.format(address)) 223 | print('is_code confidence: {}'.format(is_code)) 224 | print('not_code confidence: {}'.format(not_code)) 225 | 226 | 227 | # Closed disassembly 228 | # 229 | # Construct a code graph based on all branches and jumps. NOTE: we also need to 230 | # fix successor status handling in core. In particular, what is the successor 231 | # status of a branch instruction like BEQ? I think part of the confusion here 232 | # relates from the questionable choice to make PBR and PC part of the status; 233 | # those registers are of course not determined by succession from the "last" 234 | # instruction, but by the location of the instruction under consideration. 235 | # 236 | # That it is a poor choice can be seen by its uselessness: in the case of a 237 | # non-branching instruction, the PBR and PC values are trivial; in the case of 238 | # a branching instruction, they are unknowable. 239 | # 240 | # For the time being, we'll fix instruction loading in the core so that PBR and 241 | # PC are always given correct values. While we're at it we'll add seekable 242 | # iterators to the interface; that's needed doing for some time. This nonsense 243 | # about sometimes needing to give a container and sometimes needing to give an 244 | # iterable is just a bit too much trouble. 245 | # 246 | # In fact, that use of "blind" iterators is partly what led to the faulty PBR 247 | # and PC tracking; the instruction() function takes a plain byte iterator, so 248 | # it has no way of knowing what location it's reading from. 249 | # 250 | # Actually, is a position-aware iterator the best way to give instruction() the 251 | # information about code location? That means that instruction() would then be 252 | # responsible for handling address translation. Maybe it's better, actually, to 253 | # still use the successor status method: just be less stupid about it. 254 | # 255 | # Think: if we read a BEQ, there are actually two possible successors; one with 256 | # PC + len(inst), and one with PC + operand(inst). In the case of a closed 257 | # disassembly, we are interested in following both branches; we must simply use 258 | # the correct successor status for each branch. instruction() should return 259 | # both statuses in that case. 260 | # 261 | # So, for the moment, hold off on seekable iterators. Principle of least power. 262 | 263 | 264 | def closed_disassembly(container, offset): 265 | pass 266 | 267 | 268 | def closed_node(container, status): 269 | ''' 270 | Returns a graph node from the given location 271 | ''' 272 | src = iterfrom(container, makeadr(status.pbr, status.pc)) 273 | 274 | inst = None 275 | while not inst or not isreturn(inst.op): 276 | inst,succ = instruction(src,status) 277 | 278 | # TODO: finish 279 | 280 | 281 | 282 | 283 | def loadfile(filename): 284 | f = open(filename, mode='rb') 285 | return array('B', f.read()) 286 | 287 | 288 | def iterfrom(container, offset): 289 | while True: 290 | try: 291 | offset += 1 292 | yield container[offset - 1] 293 | except IndexError: 294 | raise StopIteration() 295 | 296 | 297 | if __name__ == '__main__': 298 | 299 | import sys 300 | if len(sys.argv) < 4: 301 | print('''Usage: 302 | disasm.py [flagstate]''') 303 | exit(1) 304 | 305 | filename = sys.argv[1] 306 | mode = sys.argv[2] 307 | address = int(sys.argv[3], 16) 308 | flags = 0 309 | if len(sys.argv) > 4: 310 | flags = int(sys.argv[4]) 311 | 312 | rom = loadfile(filename) 313 | 314 | if mode == 'dis': 315 | src = iterfrom(rom, fileoffset(address)) 316 | disassemble(src, snesoffset(address), flags) 317 | elif mode == 'testwindow': 318 | testwindow(rom, address, flags) 319 | elif mode == 'recursive': 320 | m,x = flags,flags 321 | address = snesoffset(address) 322 | status = Status(bank(address), offset(address), m, x) 323 | 324 | # Perform recursive disassembly 325 | entities,status = recursive_disassemble(rom, address, status) 326 | 327 | # Get instructions as a list, sorted by address 328 | instructions = [(k,v) for k,v in entities.items() if isinstance(v, Instruction)] 329 | instructions = [inst for (adr,inst) in sorted(instructions, key=lambda i: i[0])] 330 | 331 | print('Main segment:') 332 | for inst in instructions: 333 | print(disassembly(inst)) 334 | 335 | # Sort subroutines by address 336 | subroutines = [v for k,v in entities.items() if isinstance(v, Subroutine)] 337 | subroutines = sorted(subroutines, key = lambda s: s.address) 338 | 339 | print('Identified {} subroutines:'.format(len(subroutines))) 340 | for i,s in enumerate(subroutines): 341 | print('Subroutine {} (${:06X}):'.format(i, s.address)) 342 | for inst in s.instructions: 343 | print(disassembly(inst)) 344 | print('') 345 | 346 | 347 | 348 | #src = iterfrom(rom, address) 349 | #disassemble(src, address + 0xC00000) 350 | 351 | 352 | -------------------------------------------------------------------------------- /instructionset.py: -------------------------------------------------------------------------------- 1 | 2 | #------------------------------------------------------------------------------ 3 | # The definition of the 65816 instruction set. 4 | #------------------------------------------------------------------------------ 5 | 6 | instruction_set = { 7 | # ADC 8 | 0x69 : 'ADC #const*', # '*' = actual width of const determined by m bit 9 | 0x6D : 'ADC addr', 10 | 0x6F : 'ADC long', 11 | 0x65 : 'ADC dp', 12 | 0x72 : 'ADC (dp)', 13 | 0x67 : 'ADC [dp]', 14 | 0x7D : 'ADC addr,X', 15 | 0x7F : 'ADC long,X', 16 | 0x79 : 'ADC addr,Y', 17 | 0x75 : 'ADC dp,X', 18 | 0x61 : 'ADC (dp,X)', 19 | 0x71 : 'ADC (dp),Y', 20 | 0x77 : 'ADC [dp],Y', 21 | 0x63 : 'ADC sr,S', 22 | 0x73 : 'ADC (sr,S),Y', 23 | 24 | # AND 25 | 0x29 : 'AND #const*', 26 | 0x2D : 'AND addr', 27 | 0x2F : 'AND long', 28 | 0x25 : 'AND dp', 29 | 0x32 : 'AND (dp)', 30 | 0x27 : 'AND [dp]', 31 | 0x3D : 'AND addr,X', 32 | 0x3F : 'AND long,X', 33 | 0x39 : 'AND addr,Y', 34 | 0x35 : 'AND dp,X', 35 | 0x21 : 'AND (dp,X)', 36 | 0x31 : 'AND (dp),Y', 37 | 0x37 : 'AND [dp],Y', 38 | 0x23 : 'AND sr,S', 39 | 0x33 : 'AND (sr,S),Y', 40 | 41 | # ASL 42 | 0x0A : 'ASL', 43 | 0x0E : 'ASL addr', 44 | 0x06 : 'ASL dp', 45 | 0x1E : 'ASL addr,X', 46 | 0x16 : 'ASL dp,X', 47 | 48 | # Branches 49 | 0x90 : 'BCC near', # PC relative short (1 byte) 50 | 0xB0 : 'BCS near', 51 | 0xF0 : 'BEQ near', 52 | 0xD0 : 'BNE near', 53 | 0x30 : 'BMI near', 54 | 0x10 : 'BPL near', 55 | 0x50 : 'BVC near', 56 | 0x70 : 'BVS near', 57 | 0x80 : 'BRA near', 58 | 0x82 : 'BRL nearx', # PC relative long (2 bytes) 59 | 60 | # BIT 61 | 0x89 : 'BIT #const*', 62 | 0x2C : 'BIT addr', 63 | 0x24 : 'BIT dp', 64 | 0x3C : 'BIT addr,X', 65 | 0x34 : 'BIT dp,X', 66 | 67 | # BRK 68 | 0x00 : 'BRK byte', 69 | 70 | # Status register manipulation 71 | 0x18 : 'CLC', 72 | 0xD8 : 'CLD', 73 | 0x58 : 'CLI', 74 | 0xB8 : 'CLV', 75 | 0x38 : 'SEC', 76 | 0xF8 : 'SED', 77 | 0x78 : 'SEI', 78 | 79 | # CMP 80 | 0xC9 : 'CMP #const*', 81 | 0xCD : 'CMP addr', 82 | 0xCF : 'CMP long', 83 | 0xC5 : 'CMP dp', 84 | 0xD2 : 'CMP (dp)', 85 | 0xC7 : 'CMP [dp]', 86 | 0xDD : 'CMP addr,X', 87 | 0xDF : 'CMP long,X', 88 | 0xD9 : 'CMP addr,Y', 89 | 0xD5 : 'CMP dp,X', 90 | 0xC1 : 'CMP (dp,X)', 91 | 0xD1 : 'CMP (dp),Y', 92 | 0xD7 : 'CMP [dp],Y', 93 | 0xC3 : 'CMP sr,S', 94 | 0xD3 : 'CMP (sr,S),Y', 95 | 96 | # COP 97 | 0x02 : 'COP byte', 98 | 99 | # CPX 100 | 0xE0 : 'CPX #const+', 101 | 0xEC : 'CPX addr', 102 | 0xE4 : 'CPX dp', 103 | 104 | # CPY 105 | 0xC0 : 'CPY #const+', 106 | 0xCC : 'CPY addr', 107 | 0xC4 : 'CPY dp', 108 | 109 | # DEC / DEX / DEY 110 | 0x3A : 'DEA', 111 | 0xCE : 'DEC addr', 112 | 0xC6 : 'DEC dp', 113 | 0xDE : 'DEC addr,X', 114 | 0xD6 : 'DEC dp,X', 115 | 0xCA : 'DEX', 116 | 0x88 : 'DEY', 117 | 118 | # EOR 119 | 0x49 : 'EOR #const*', 120 | 0x4D : 'EOR addr', 121 | 0x4F : 'EOR long', 122 | 0x45 : 'EOR dp', 123 | 0x52 : 'EOR (dp)', 124 | 0x47 : 'EOR [dp]', 125 | 0x5D : 'EOR addr,X', 126 | 0x5F : 'EOR long,X', 127 | 0x59 : 'EOR addr,Y', 128 | 0x55 : 'EOR dp,X', 129 | 0x41 : 'EOR (dp,X)', 130 | 0x51 : 'EOR (dp),Y', 131 | 0x57 : 'EOR [dp],Y', 132 | 0x43 : 'EOR sr,S', 133 | 0x53 : 'EOR (sr,S),Y', 134 | 135 | # INC, INX, INY 136 | 0x1A : 'INA', 137 | 0xEE : 'INC addr', 138 | 0xE6 : 'INC dp', 139 | 0xFE : 'INC addr,X', 140 | 0xF6 : 'INC dp,X', 141 | 0xE8 : 'INX', 142 | 0xC8 : 'INY', 143 | 144 | # JMP / JML 145 | 0x4C : 'JMP addr', 146 | 0x6C : 'JMP (addr)', 147 | 0x7C : 'JMP (addr,X)', 148 | 0x5C : 'JML long', 149 | 0xDC : 'JML [addr]', 150 | 151 | # JSR / JSL 152 | 0x22 : 'JSL long', 153 | 0x20 : 'JSR addr', 154 | 0xFC : 'JSR (addr,X)', 155 | 156 | # LDA 157 | 0xA9 : 'LDA #const*', 158 | 0xAD : 'LDA addr', 159 | 0xAF : 'LDA long', 160 | 0xA5 : 'LDA dp', 161 | 0xB2 : 'LDA (dp)', 162 | 0xA7 : 'LDA [dp]', 163 | 0xBD : 'LDA addr,X', 164 | 0xBF : 'LDA long,X', 165 | 0xB9 : 'LDA addr,Y', 166 | 0xB5 : 'LDA dp,X', 167 | 0xA1 : 'LDA (dp,X)', 168 | 0xB1 : 'LDA (dp),Y', 169 | 0xB7 : 'LDA [dp],Y', 170 | 0xA3 : 'LDA sr,S', 171 | 0xB3 : 'LDA (sr,S),Y', 172 | 173 | # LDX 174 | 0xA2 : 'LDX #const+', # '+' = width depends on x status bit 175 | 0xAE : 'LDX addr', 176 | 0xA6 : 'LDX dp', 177 | 0xBE : 'LDX addr,Y', 178 | 0xB6 : 'LDX dp,Y', 179 | 180 | # LDY 181 | 0xA0 : 'LDY #const+', 182 | 0xAC : 'LDY addr', 183 | 0xA4 : 'LDY dp', 184 | 0xBC : 'LDY addr,X', 185 | 0xB4 : 'LDY dp,X', 186 | 187 | # LSR 188 | 0x4A : 'LSR', 189 | 0x4E : 'LSR addr', 190 | 0x46 : 'LSR dp', 191 | 0x5E : 'LSR addr,X', 192 | 0x56 : 'LSR dp,X', 193 | 194 | # MVN/MVP 195 | 0x54 : 'MVN src,dst', 196 | 0x44 : 'MVP src,dst', 197 | 198 | # NOP 199 | 0xEA : 'NOP', 200 | 201 | # ORA 202 | 0x09 : 'ORA #const*', 203 | 0x0D : 'ORA addr', 204 | 0x0F : 'ORA long', 205 | 0x05 : 'ORA dp', 206 | 0x12 : 'ORA (dp)', 207 | 0x07 : 'ORA [dp]', 208 | 0x1D : 'ORA addr,X', 209 | 0x1F : 'ORA long,X', 210 | 0x19 : 'ORA addr,Y', 211 | 0x15 : 'ORA dp,X', 212 | 0x01 : 'ORA (dp,X)', 213 | 0x11 : 'ORA (dp),Y', 214 | 0x17 : 'ORA [dp],Y', 215 | 0x03 : 'ORA sr,S', 216 | 0x13 : 'ORA (sr,S),Y', 217 | 218 | # PEA / PEI / PER 219 | 0xF4 : 'PEA addr', 220 | 0xD4 : 'PEI (dp)', 221 | 0x62 : 'PER short', # label? see 65816info.txt:1742 222 | 223 | # Push / pull registers 224 | 0x48 : 'PHA', 225 | 0x08 : 'PHP', 226 | 0xDA : 'PHX', 227 | 0x5A : 'PHY', 228 | 0x68 : 'PLA', 229 | 0x28 : 'PLP', 230 | 0xFA : 'PLX', 231 | 0x7A : 'PLY', 232 | 0x8B : 'PHB', 233 | 0x0B : 'PHD', 234 | 0x4B : 'PHK', 235 | 0xAB : 'PLB', 236 | 0x2B : 'PLD', 237 | 238 | # REP 239 | 0xC2 : 'REP #byte', 240 | 241 | # ROL 242 | 0x2A : 'ROL', 243 | 0x2E : 'ROL addr', 244 | 0x26 : 'ROL dp', 245 | 0x3E : 'ROL addr,X', 246 | 0x36 : 'ROL dp,X', 247 | 248 | # ROR 249 | 0x6A : 'ROR', 250 | 0x6E : 'ROR addr', 251 | 0x66 : 'ROR dp', 252 | 0x7E : 'ROR addr,X', 253 | 0x76 : 'ROR dp,X', 254 | 255 | # RTI / RTL / RTS 256 | 0x40 : 'RTI', 257 | 0x6B : 'RTL', 258 | 0x60 : 'RTS', 259 | 260 | # SBC 261 | 0xE9 : 'SBC #const*', 262 | 0xED : 'SBC addr', 263 | 0xEF : 'SBC long', 264 | 0xE5 : 'SBC dp', 265 | 0xF2 : 'SBC (dp)', 266 | 0xE7 : 'SBC [dp]', 267 | 0xFD : 'SBC addr,X', 268 | 0xFF : 'SBC long,X', 269 | 0xF9 : 'SBC addr,Y', 270 | 0xF5 : 'SBC dp,X', 271 | 0xE1 : 'SBC (dp,X)', 272 | 0xF1 : 'SBC (dp),Y', 273 | 0xF7 : 'SBC [dp],Y', 274 | 0xE3 : 'SBC sr,S', 275 | 0xF3 : 'SBC (sr,S),Y', 276 | 277 | # SEP 278 | 0xE2 : 'SEP #byte', 279 | 280 | # STA 281 | 0x8D : 'STA addr', 282 | 0x8F : 'STA long', 283 | 0x85 : 'STA dp', 284 | 0x92 : 'STA (dp)', 285 | 0x87 : 'STA [dp]', 286 | 0x9D : 'STA addr,X', 287 | 0x9F : 'STA long,X', 288 | 0x99 : 'STA addr,Y', 289 | 0x95 : 'STA dp,X', 290 | 0x81 : 'STA (dp,X)', 291 | 0x91 : 'STA (dp),Y', 292 | 0x97 : 'STA [dp],Y', 293 | 0x83 : 'STA sr,S', 294 | 0x93 : 'STA (sr,S),Y', 295 | 296 | # STP 297 | 0xDB : 'STP', 298 | 299 | # STX / STY 300 | 0x8E : 'STX addr', 301 | 0x86 : 'STX dp', 302 | 0x96 : 'STX dp,Y', 303 | 0x8C : 'STY addr', 304 | 0x84 : 'STY dp', 305 | 0x94 : 'STY dp,X', 306 | 307 | # STZ 308 | 0x9C : 'STZ addr', 309 | 0x64 : 'STZ dp', 310 | 0x9E : 'STZ addr,X', 311 | 0x74 : 'STZ dp,X', 312 | 313 | # Register transfers 314 | 0xAA : 'TAX', 315 | 0xA8 : 'TAY', 316 | 0x8A : 'TXA', 317 | 0x98 : 'TYA', 318 | 0xBA : 'TSX', 319 | 0x9A : 'TXS', 320 | 0x9B : 'TXY', 321 | 0xBB : 'TYX', 322 | 323 | # Direct page manipulation 324 | 0x5B : 'TCD', 325 | 0x7B : 'TDC', 326 | 327 | # Stack pointer manipulation 328 | 0x1B : 'TCS', 329 | 0x3B : 'TSC', 330 | 331 | # Test and set/reset memory bits 332 | 0x1C : 'TRB addr', 333 | 0x14 : 'TRB dp', 334 | 0x0C : 'TSB addr', 335 | 0x04 : 'TSB dp', 336 | 337 | # Wait for interrupt 338 | 0xCB : 'WAI', 339 | 340 | # So long and thanks for all the fish 341 | 0x42 : 'WDM', 342 | 343 | # XBA / XCE 344 | 0xEB : 'XBA', 345 | 0xFB : 'XCE', 346 | } 347 | 348 | assert(len(instruction_set) == 256) 349 | 350 | #------------------------------------------------------------------------------ 351 | # Some useful predicates over the instruction set 352 | #------------------------------------------------------------------------------ 353 | 354 | def isbranch(op): 355 | '''Returns True iff op is a branching instruction OTHER THAN BRL or BRA.''' 356 | return op in { 0x90, 0xB0, 0xF0, 0xD0, 0x30, 0x10, 0x50, 0x70 } 357 | 358 | 359 | def isreturn(op): 360 | '''Returns True iff op is RTL or RTS.''' 361 | return op in { 0x60, 0x6B } 362 | 363 | def isjmp(op): 364 | '''Returns True iff op is an absolute jump.''' 365 | return op in { 0x4C, 0x5C } 366 | 367 | def isindirectjmp(op): 368 | '''Returns True iff op is an indirect (untraceable) jump.''' 369 | return op in { 0x6C, 0x7C, 0xDC, 0xFC } 370 | 371 | 372 | 373 | 374 | 375 | --------------------------------------------------------------------------------