mirror of https://github.com/zeldaret/mm.git
980 lines
43 KiB
Python
Executable File
980 lines
43 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse, os, struct, ast, bisect
|
|
|
|
SPLIT_FILES = True # TODO this should be a flag somewhere
|
|
|
|
loadHighRefs = {}
|
|
loadLowRefs = {}
|
|
|
|
known_files = {}
|
|
known_funcs = dict()
|
|
known_objects = dict()
|
|
known_vars = dict()
|
|
|
|
regs = {
|
|
0:"$zero", 1:"$at", 2:"$v0", 3:"$v1", 4:"$a0", 5:"$a1", 6:"$a2", 7:"$a3",
|
|
8:"$t0", 9:"$t1", 10:"$t2", 11:"$t3", 12:"$t4", 13:"$t5", 14:"$t6", 15:"$t7",
|
|
16:"$s0", 17:"$s1", 18:"$s2", 19:"$s3", 20:"$s4", 21:"$s5", 22:"$s6", 23:"$s7",
|
|
24:"$t8", 25:"$t9", 26:"$k0", 27:"$k1", 28:"$gp", 29:"$sp", 30:"$fp", 31:"$ra",
|
|
}
|
|
|
|
ops = {
|
|
2:"j", 3:"jal", 4:"beq", 5:"bne", 6:"blez", 7:"bgtz",
|
|
8:"addi", 9:"addiu", 10:"slti", 11:"sltiu", 12:"andi", 13:"ori", 14:"xori", 15:"lui",
|
|
20:"beql", 21:"bnel", 22:"blezl", 23:"bgtzl",
|
|
24:"daddi", 25:"daddiu",
|
|
32:"lb", 33:"lh", 34:"lwl", 35:"lw", 36:"lbu", 37:"lhu", 38:"lwr",
|
|
40:"sb", 41:"sh", 42:"swl", 43:"sw", 46:"swr", 47:"cache",
|
|
48:"ll", 49:"lwc1", 50:"lwc2", 51:"pref", 53:"ldc1", 54:"ldc2", 55:"ld",
|
|
56:"sc", 57:"swc1", 58:"swc2", 61:"sdc1", 62:"sdc2", 63:"sd",
|
|
}
|
|
|
|
funcs = {
|
|
0:"sll", 2:"srl", 3:"sra", 4:"sllv", 6:"srlv", 7:"srav",
|
|
8:"jr", 9:"jalr", 10:"movz", 11:"movn", 12:"syscall", 13:"break", 15:"sync",
|
|
16:"mfhi", 17:"mthi", 18:"mflo", 19:"mtlo", 20:"dsllv", 22:"dsrlv", 23:"dsrav",
|
|
24:"mult", 25:"multu", 26:"div", 27:"divu", 28:"dmult", 29:"dmultu", 30:"ddiv", 31:"ddivu",
|
|
32:"add", 33:"addu", 34:"sub", 35:"subu", 36:"and", 37:"or", 38:"xor", 39:"nor",
|
|
42:"slt", 43:"sltu", 44:"dadd", 45:"daddu",
|
|
48:"tge", 49:"tgeu", 50:"tlt", 51:"tltu", 52:"teq", 54:"tne",
|
|
56:"dsll", 59:"dsra", 60:"dsll32", 63:"dsra32",
|
|
}
|
|
|
|
branch1reg = {
|
|
0:"bltz", 1:"bgez", 2:"bltzl", 3:"bgezl",
|
|
8:"tgei", 9:"tgeiu", 10:"tlti", 11:"tltiu", 12:"tegi", 14:"tnei",
|
|
16:"bltzal", 17:"bgezal", 18:"bltall", 19:"bgczall",
|
|
}
|
|
|
|
floats = {
|
|
0:"add", 1:"sub", 2:"mul", 3:"div", 4:"sqrt", 5:"abs", 6:"mov", 7:"neg",
|
|
8:"round.l", 9:"trunc.l", 10:"ceil.l", 11:"floor.l", 12:"round.w", 13:"trunc.w", 14:"ceil.w", 15:"floor.w",
|
|
18:"movz", 19:"movn",
|
|
32:"cvt.s", 33:"cvt.d", 36:"cvt.w", 37:"cvt.l",
|
|
48:"c.f", 49:"c.un", 50:"c.eq", 51:"c.ueq", 52:"c.olt", 53:"c.ult", 54:"c.ole", 55:"c.ule",
|
|
56:"c.sf", 57:"c.ngle", 58:"c.seq", 59:"c.ngl", 60:"c.lt", 61:"c.nge", 62:"c.le", 63:"c.ngt",
|
|
}
|
|
|
|
def read_file(name):
|
|
file_data=[]
|
|
|
|
try:
|
|
with open(name, 'rb') as f:
|
|
file_data = f.read()
|
|
except IOError:
|
|
print('failed to read file ' + name)
|
|
return file_data
|
|
|
|
|
|
def float_reg(num):
|
|
if num == 31:
|
|
return "$31"
|
|
return "$f%d" % num
|
|
|
|
|
|
def format_ref(name, offset):
|
|
if offset == 0:
|
|
return "{}".format(name)
|
|
else:
|
|
return "{} + 0x{:X}".format(name, offset)
|
|
|
|
def get_op(inst):
|
|
return (inst & 0b11111100000000000000000000000000) >> 26
|
|
|
|
|
|
def get_func(inst):
|
|
return (inst & 0b00000000000000000000000000111111)
|
|
|
|
|
|
def get_rs(inst):
|
|
return (inst & 0b00000011111000000000000000000000) >> 21
|
|
|
|
|
|
def get_rt(inst):
|
|
return (inst & 0b00000000000111110000000000000000) >> 16
|
|
|
|
|
|
def get_rd(inst):
|
|
return (inst & 0b00000000000000001111100000000000) >> 11
|
|
|
|
|
|
def get_shift(inst):
|
|
return (inst & 0b00000000000000000000011111000000) >> 6
|
|
|
|
|
|
def get_ft(inst):
|
|
return (inst & 0b00000000000111110000000000000000) >> 16
|
|
|
|
|
|
def get_fs(inst):
|
|
return (inst & 0b00000000000000001111100000000000) >> 11
|
|
|
|
|
|
def get_fd(inst):
|
|
return (inst & 0b00000000000000000000011111000000) >> 6
|
|
|
|
|
|
def get_imm(inst):
|
|
return inst & 0b00000000000000001111111111111111
|
|
|
|
|
|
def get_signed_imm(inst):
|
|
imm = get_imm(inst)
|
|
if (imm & (1 << 15)) != 0:
|
|
imm = -2**15 + (imm & 0b00000000000000000111111111111111)
|
|
return imm
|
|
|
|
|
|
def is_load(inst):
|
|
return get_op(inst) > 31
|
|
|
|
|
|
def get_func_name(addr):
|
|
if addr in known_funcs:
|
|
return known_funcs[addr][0]
|
|
else:
|
|
return "func_%08X" % addr
|
|
|
|
|
|
def get_symbol_name(addr):
|
|
if addr in known_vars and known_vars[addr][0] != "":
|
|
return known_vars[addr][0]
|
|
else:
|
|
return "D_%08X" % addr
|
|
|
|
|
|
def write_header(file, is_data):
|
|
file.write(".set noat # allow use of $at\n"
|
|
".set noreorder # don't insert nops after branches\n"
|
|
".set gp=64 # allow use of 64bit registers\n"
|
|
".macro glabel label\n"
|
|
" .global \label\n"
|
|
" \label:\n"
|
|
".endm\n"
|
|
"\n")
|
|
|
|
if is_data:
|
|
if "_rodata" in file.name:
|
|
file.write(".section .rodata\n\n")
|
|
else:
|
|
file.write(".section .data\n\n")
|
|
|
|
|
|
# TODO add code_regions?
|
|
class Disassembler:
|
|
|
|
class File:
|
|
def __init__(self, name, disasmPath, data, vaddr):
|
|
self.name = name
|
|
self.disasmPath = disasmPath
|
|
self.data = data
|
|
self.vaddr = vaddr
|
|
self.size = len(data)
|
|
|
|
def get_inst(self, num):
|
|
offset = num*4
|
|
return struct.unpack('>I', self.data[offset:offset+4])[0]
|
|
|
|
def __init__(self):
|
|
self.files = list()
|
|
self.objects = set()
|
|
self.functions = set()
|
|
self.labels = set()
|
|
self.vars = set()
|
|
self.switch_cases = set()
|
|
self.vars_sorted = list()
|
|
self.vars_length = dict()
|
|
self.data_regions = list()
|
|
self.bss_regions = list()
|
|
|
|
self.has_done_first_pass = False
|
|
|
|
self.auto_analysis = False
|
|
|
|
self.is_data_cache = {}
|
|
self.is_code_cache = {}
|
|
self.is_bss_cache = {}
|
|
|
|
def load_defaults(self):
|
|
for file in known_files:
|
|
self.add_file(file[0], file[1], file[2], file[3])
|
|
self.add_object(file[3]) # assume every file starts with a object and function
|
|
self.add_function(file[3])
|
|
for region in file[4]:
|
|
self.add_data_region(region[0], region[1], file[2])
|
|
for region in file[5]:
|
|
self.add_bss_region(region[0], region[1], file[2])
|
|
|
|
for addr in known_funcs:
|
|
self.add_function(addr)
|
|
|
|
for addr in known_objects:
|
|
self.add_object(addr)
|
|
if self.is_in_code(addr):
|
|
self.add_function(addr) # assume every object starts with a function
|
|
|
|
for addr in known_vars:
|
|
self.add_variable(addr, known_vars[addr][3])
|
|
|
|
def reset_cache(self):
|
|
self.is_data_cache = {}
|
|
self.is_code_cache = {}
|
|
self.is_bss_cache = {}
|
|
|
|
def add_file(self, disasmPath, readPath, name, vaddr):
|
|
self.files.append(self.File(name, disasmPath, read_file(readPath + '/' + name), vaddr))
|
|
self.files = sorted(self.files, key = lambda file: file.vaddr)
|
|
self.reset_cache()
|
|
|
|
def add_object(self, addr):
|
|
self.objects.add(addr)
|
|
|
|
def add_function(self, addr):
|
|
self.functions.add(addr)
|
|
|
|
def add_variable(self, addr, size):
|
|
# TODO special case this value that is mis-identified as an address and causes problems by being in the middle of a pointer
|
|
if addr == 0x80AAB3AE:
|
|
return
|
|
self.vars.add(addr)
|
|
bisect.insort(self.vars_sorted, addr)
|
|
self.vars_length[addr] = size
|
|
|
|
def add_label(self, addr):
|
|
self.labels.add(addr)
|
|
|
|
def add_data_region(self, start, end, file_name):
|
|
self.data_regions.append((start, end, file_name))
|
|
self.data_regions = sorted(self.data_regions, key = lambda region: region[0])
|
|
self.reset_cache()
|
|
|
|
def add_bss_region(self, start, end, file_name):
|
|
self.bss_regions.append((start, end, file_name))
|
|
self.bss_regions = sorted(self.bss_regions, key = lambda region: region[0])
|
|
self.reset_cache()
|
|
|
|
def set_auto_analysis(self, setting):
|
|
self.auto_analysis = setting
|
|
|
|
def is_in_data(self, addr):
|
|
if addr in self.is_data_cache:
|
|
return self.is_data_cache[addr]
|
|
|
|
start = 0
|
|
last = len(self.data_regions) - 1
|
|
while start <= last:
|
|
midpoint = (start + last) // 2
|
|
if addr >= self.data_regions[midpoint][0]:
|
|
if addr <= self.data_regions[midpoint][1]:
|
|
self.is_data_cache[addr] = True
|
|
return True
|
|
else:
|
|
start = midpoint + 1
|
|
else:
|
|
last = midpoint - 1
|
|
|
|
self.is_data_cache[addr] = False
|
|
return False
|
|
|
|
def is_in_code(self, addr):
|
|
if addr in self.is_code_cache:
|
|
return self.is_code_cache[addr]
|
|
|
|
start = 0
|
|
last = len(self.files) - 1
|
|
while start <= last:
|
|
midpoint = (start + last) // 2
|
|
if addr >= self.files[midpoint].vaddr:
|
|
if addr < (self.files[midpoint].vaddr + self.files[midpoint].size):
|
|
self.is_code_cache[addr] = not self.is_in_data(addr)
|
|
return self.is_code_cache[addr]
|
|
else:
|
|
start = midpoint + 1
|
|
else:
|
|
last = midpoint - 1
|
|
|
|
self.is_code_cache[addr] = False
|
|
return False
|
|
|
|
def is_in_bss(self, addr):
|
|
if addr in self.is_bss_cache:
|
|
return self.is_bss_cache[addr]
|
|
|
|
start = 0
|
|
last = len(self.bss_regions) - 1
|
|
while start <= last:
|
|
midpoint = (start + last) // 2
|
|
if addr >= self.bss_regions[midpoint][0]:
|
|
if addr <= self.bss_regions[midpoint][1]:
|
|
self.is_bss_cache[addr] = True
|
|
return True, self.bss_regions[midpoint]
|
|
else:
|
|
start = midpoint + 1
|
|
else:
|
|
last = midpoint - 1
|
|
|
|
self.is_bss_cache[addr] = False
|
|
return False, None
|
|
|
|
def is_in_data_or_undef(self, addr):
|
|
# return true if it is in a defined data region
|
|
if self.is_in_data(addr):
|
|
return True
|
|
|
|
# otherwise return false if it is in a file's bounds
|
|
if self.is_in_code(addr):
|
|
return False
|
|
|
|
# otherwise it is undefined (return true)
|
|
return True
|
|
|
|
def is_start_of_variable(self, addr):
|
|
return addr in self.vars
|
|
|
|
def get_variable_offset(self, addr):
|
|
if len(self.vars_sorted) == 0:
|
|
return None
|
|
if self.is_start_of_variable(addr):
|
|
return (addr, 0)
|
|
nearest = self.vars_sorted[bisect.bisect_left(self.vars_sorted, addr)-1]
|
|
offset = addr - nearest
|
|
if offset < self.vars_length[nearest]:
|
|
return (nearest, offset)
|
|
return None
|
|
|
|
def make_label(self, imm, cur):
|
|
addr = (imm*4) + cur + 4
|
|
self.add_label(addr)
|
|
return ".L%08X" % addr
|
|
|
|
def make_func(self, imm, cur):
|
|
addr = (imm*4) + (cur & 0xF0000000)
|
|
self.add_function(addr)
|
|
return get_func_name(addr)
|
|
|
|
def make_load(self, addr):
|
|
if self.is_in_data_or_undef(addr):
|
|
return get_symbol_name(addr)
|
|
else:
|
|
return get_func_name(addr)
|
|
|
|
# TODO refactor to remove file_addr
|
|
def get_object_name(self, addr, file_addr):
|
|
filename = ""
|
|
|
|
for file in self.files:
|
|
if file_addr == file.vaddr:
|
|
filename = file.name
|
|
|
|
if filename == "":
|
|
print("Bad file_addr passed to get_object_name: 0x%0X" % addr)
|
|
return
|
|
|
|
if SPLIT_FILES:
|
|
if addr in known_objects and known_objects[addr] != "": # no name means object boundary is known but not the name
|
|
return known_objects[addr]
|
|
else:
|
|
return '%s_0x%08X' % (filename, addr)
|
|
else:
|
|
return "%s" % filename
|
|
|
|
def guess_functions_and_variables_from_data(self):
|
|
for file in self.files:
|
|
for i in range(0, file.size // 4):
|
|
word = file.get_inst(i)
|
|
addr = file.vaddr + i*4
|
|
if self.is_in_data(addr):
|
|
if self.is_in_code(word) and (word % 4) == 0:
|
|
None
|
|
# TODO functions are disabled for now due to behaving poorly with switches. This should be a flag.
|
|
#self.add_function(word)
|
|
elif self.is_in_data(word):
|
|
self.add_variable(word, 1)
|
|
|
|
|
|
def disassemble(self, path):
|
|
self.first_pass()
|
|
self.second_pass(path)
|
|
|
|
def first_pass(self):
|
|
if self.has_done_first_pass == True:
|
|
return
|
|
|
|
for file in self.files:
|
|
for i in range(0, file.size // 4):
|
|
inst = file.get_inst(i)
|
|
addr = file.vaddr + i*4
|
|
if not self.is_in_data_or_undef(addr):
|
|
self.disassemble_inst(inst, addr, i, file)
|
|
|
|
if inst == 0x03E00008 and self.auto_analysis: # jr $ra
|
|
next_index = i+2
|
|
if file.get_inst(next_index) == 0: # nop
|
|
while file.get_inst(next_index) == 0:
|
|
next_index += 1
|
|
|
|
new_object_start = file.vaddr + next_index*4 + 15
|
|
new_object_start -= new_object_start % 16
|
|
|
|
# don't split if it's the start of a data section, it's probably the same object
|
|
if not self.is_in_data_or_undef(new_object_start):
|
|
self.add_object(new_object_start)
|
|
if addr in self.vars:
|
|
name = self.make_load(addr)
|
|
if name.startswith("jtbl_"):
|
|
addr_i = i
|
|
case_addr = file.get_inst(addr_i)
|
|
while self.is_in_code(case_addr):
|
|
self.switch_cases.add(case_addr)
|
|
addr_i += 1
|
|
if addr_i >= (file.size // 4):
|
|
break
|
|
case_addr = file.get_inst(addr_i)
|
|
if self.auto_analysis:
|
|
self.guess_functions_and_variables_from_data()
|
|
self.has_done_first_pass = True
|
|
|
|
def build_disassembled_path(self, basePath, addr, file):
|
|
return basePath + '/' + file.disasmPath + '%s.asm' % self.get_object_name(addr, file.vaddr)
|
|
|
|
def second_pass(self, path):
|
|
for file in self.files:
|
|
filename = self.build_disassembled_path(path, file.vaddr, file)
|
|
|
|
with open(filename, 'w') as f:
|
|
write_header(f, self.is_in_data_or_undef(file.vaddr))
|
|
|
|
for i in range(0, file.size // 4):
|
|
inst = file.get_inst(i)
|
|
addr = file.vaddr + i*4
|
|
|
|
if addr in self.objects and SPLIT_FILES:
|
|
f.close()
|
|
filename = self.build_disassembled_path(path, addr, file)
|
|
f = open(filename, 'w')
|
|
write_header(f, self.is_in_data_or_undef(addr))
|
|
|
|
if addr in self.labels and addr not in self.switch_cases:
|
|
f.write(".L%08X:\n" % addr)
|
|
if addr in self.switch_cases:
|
|
f.write("glabel L%08X\n" % addr)
|
|
f.write(".L%08X:\n" % addr)
|
|
if addr in self.functions:
|
|
name = get_func_name(addr)
|
|
f.write("\nglabel %s\n" % name)
|
|
|
|
if not self.is_in_data_or_undef(addr):
|
|
f.write("/* %06d 0x%08X %08X */ %s\n" % (i, addr, inst, self.disassemble_inst(inst, addr, i, file)))
|
|
elif inst in self.functions:
|
|
if self.is_start_of_variable(addr):
|
|
name = self.make_load(addr)
|
|
f.write("glabel %s\n" % name)
|
|
f.write("/* %06d 0x%08X */ .word\t%s\n" % (i, addr, get_func_name(inst)))
|
|
elif inst in self.switch_cases:
|
|
if self.is_start_of_variable(addr):
|
|
name = self.make_load(addr)
|
|
f.write("glabel %s\n" % name)
|
|
f.write("/* %06d 0x%08X */ .word\tL%08X\n" % (i, addr, inst))
|
|
elif self.is_start_of_variable(inst):
|
|
if self.is_start_of_variable(addr):
|
|
name = self.make_load(addr)
|
|
f.write("glabel %s\n" % name)
|
|
f.write("/* %06d 0x%08X */ .word\t%s\n" % (i, addr, self.make_load(inst)))
|
|
elif inst >= 0x801F0568 and inst < 0x801F0684:
|
|
# XXX special case gSaveContext.weekEventReg because there are pointers to fields of it in other parts of dara
|
|
# TODO think of a better way to do this
|
|
if self.is_start_of_variable(addr):
|
|
name = self.make_load(addr)
|
|
f.write("glabel %s\n" % name)
|
|
var = self.get_variable_offset(inst)
|
|
f.write("/* %06d 0x%08X */ .word\t(%s + 0x%08X)\n" % (i, addr, self.make_load(var[0]), var[1]))
|
|
else:
|
|
# TODO this should be moved into a print_data_range function or something
|
|
print_head = addr
|
|
data_stream = inst
|
|
while print_head < addr + 4:
|
|
if self.is_start_of_variable(print_head):
|
|
name = self.make_load(print_head)
|
|
f.write("glabel %s\n" % name)
|
|
if self.is_start_of_variable(print_head+1) or print_head % 2 != 0:
|
|
f.write("/* %06d 0x%08X */ .byte\t0x%02X\n" % (i, addr, (data_stream >> 24) & 0xFF))
|
|
data_stream <<= 8
|
|
print_head += 1
|
|
elif self.is_start_of_variable(print_head+2) or self.is_start_of_variable(print_head+3) or print_head % 4 != 0:
|
|
f.write("/* %06d 0x%08X */ .short\t0x%04X\n" % (i, addr, (data_stream >> 16) & 0xFFFF))
|
|
data_stream <<= 16
|
|
print_head += 2
|
|
else:
|
|
f.write("/* %06d 0x%08X */ .word\t0x%08X\n" % (i, addr, data_stream & 0xFFFFFFFF))
|
|
data_stream <<= 32
|
|
print_head += 4
|
|
|
|
def determine_load_ref_impl(self, file, inst_i, start_i, depth, from_branch=False, visited=set()):
|
|
candidates = []
|
|
|
|
if not from_branch:
|
|
visited.clear()
|
|
|
|
# debug_i = 5217
|
|
debug_i = 0xFFFFFFFF
|
|
|
|
if inst_i == debug_i:
|
|
print("{} {} {}".format(inst_i, start_i, from_branch))
|
|
|
|
if depth <= 0:
|
|
return candidates
|
|
# TODO better detect when the register gets dirty
|
|
cur_inst = file.get_inst(inst_i)
|
|
|
|
addr_high = get_imm(cur_inst)
|
|
if (addr_high > 0x80C2) or (addr_high < 0x8000):
|
|
return candidates
|
|
|
|
if get_op(cur_inst) != 15:
|
|
return candidates
|
|
|
|
# set state based on previous instruction
|
|
# TODO cleanup
|
|
if start_i == 0 or from_branch:
|
|
prev_was_jump = False
|
|
prev_was_ret = False
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_f_likely = False
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_always = False
|
|
prev_target = 0
|
|
else:
|
|
prev_inst = file.get_inst(start_i - 1)
|
|
prev_op = get_op(prev_inst)
|
|
prev_was_jump = (prev_op == 2 or prev_op == 3)
|
|
prev_was_ret = (prev_op == 0 and get_func(prev_inst) == 8)
|
|
prev_was_branch = (prev_op == 4 or prev_op == 5 or prev_op == 6 or prev_op == 7) or \
|
|
(prev_op == 1 and (get_rt(prev_inst) == 0 or get_rt(prev_inst) == 1))
|
|
prev_was_branch_f = ((prev_op == 16) or (prev_op == 17) or (prev_op == 18)) and (get_rs(prev_inst) == 8) and ((prev_inst & (1 << 17)) == 0)
|
|
prev_was_branch_f_likely = ((prev_op == 16) or (prev_op == 17) or (prev_op == 18)) and (get_rs(prev_inst) == 8) and ((prev_inst & (1 << 17)) != 0)
|
|
prev_was_branch_always = prev_op == 4 and get_rs(prev_inst) == get_rt(prev_inst) == 0
|
|
prev_was_branch_likely = (prev_op == 20 or prev_op == 21 or prev_op == 22 or prev_op == 23) or \
|
|
(prev_op == 1 and (get_rt(prev_inst) == 2 or get_rt(prev_inst) == 3))
|
|
prev_target = get_signed_imm(prev_inst) * 4 + (file.vaddr + (start_i-1)*4) + 4
|
|
|
|
if prev_was_branch or prev_was_branch_f or prev_was_branch_likely or prev_was_branch_f_likely:
|
|
if inst_i == debug_i:
|
|
print("branch start")
|
|
branch_candidates = self.determine_load_ref_impl(file, inst_i, (prev_target - file.vaddr) // 4, depth, True, visited)
|
|
candidates += branch_candidates
|
|
if inst_i == debug_i:
|
|
print("branch end, found {}".format(branch_candidates))
|
|
|
|
|
|
continue_branch = not (prev_was_branch_likely or prev_was_branch_f_likely)
|
|
|
|
prev_was_jump = False
|
|
prev_was_ret = False
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_f_likely = False
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_always = False
|
|
prev_target = 0
|
|
|
|
inst_read_addr = start_i if from_branch else start_i + 1
|
|
|
|
for i in range(1, depth + 1):
|
|
if not continue_branch:
|
|
break
|
|
|
|
if inst_read_addr in visited:
|
|
break
|
|
|
|
if inst_i == debug_i:
|
|
print(" {}".format(inst_read_addr))
|
|
|
|
if self.is_in_data(file.vaddr + inst_read_addr*4):
|
|
break
|
|
|
|
next_inst = file.get_inst(inst_read_addr)
|
|
next_op = get_op(next_inst)
|
|
next_func = get_func(next_inst)
|
|
|
|
# TODO consolidate
|
|
if (next_op == 9) and (get_rt(cur_inst) == get_rs(next_inst)): # lui + addiu (move pointer)
|
|
addr = (get_imm(cur_inst) << 16) + get_signed_imm(next_inst)
|
|
|
|
# TODO workaround to avoid classifying loading constants as loading pointers
|
|
# This unfortunately causes it to not detect object addresses
|
|
if addr > 0x80000000:
|
|
var = self.get_variable_offset(addr)
|
|
|
|
if var == None:
|
|
if self.auto_analysis:
|
|
if self.is_in_data_or_undef(addr):
|
|
self.add_variable(addr, 1)
|
|
var = (addr, 0)
|
|
else:
|
|
self.add_function(addr)
|
|
var = (addr, 0)
|
|
elif addr in self.functions:
|
|
var = (addr, 0)
|
|
|
|
if var != None:
|
|
candidates.append((file.vaddr + inst_read_addr*4, var[0], var[1]))
|
|
|
|
if get_rt(cur_inst) == get_rt(next_inst):
|
|
if prev_was_branch_likely or prev_was_branch_f_likely:
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_f_likely = False
|
|
else:
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_always = False
|
|
continue_branch = False
|
|
|
|
elif is_load(next_inst) and (get_rt(cur_inst) == get_rs(next_inst)): # lui + load (load pointer)
|
|
addr = (get_imm(cur_inst) << 16) + get_signed_imm(next_inst)
|
|
|
|
# TODO workaround to avoid classifying loading constants as loading pointers
|
|
# This unfortunately causes it to not detect object addresses
|
|
if addr > 0x80000000:
|
|
var = self.get_variable_offset(addr)
|
|
|
|
if var == None:
|
|
if self.auto_analysis:
|
|
if self.is_in_data_or_undef(addr):
|
|
self.add_variable(addr, 1)
|
|
var = (addr, 0)
|
|
else:
|
|
print("Warning: Pointer load location is in code 0x%08X @ 0x%08X, base=0x%08X" % (addr, file.vaddr + inst_read_addr*4, file.vaddr + inst_i*4))
|
|
self.add_function(addr)
|
|
var = (addr, 0)
|
|
elif addr in self.functions:
|
|
var = (addr, 0)
|
|
|
|
if var != None:
|
|
candidates.append((file.vaddr + inst_read_addr*4, var[0], var[1]))
|
|
|
|
if get_rt(cur_inst) == get_rt(next_inst):
|
|
if prev_was_branch_likely or prev_was_branch_f_likely:
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_f_likely = False
|
|
else:
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_always = False
|
|
continue_branch = False
|
|
|
|
elif ((next_op >= 32) and (next_op < 48)) and (get_rt(cur_inst) == get_rt(next_inst)): # load that overwrites the reg () exclude fp loads
|
|
if prev_was_branch_likely or prev_was_branch_f_likely:
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_f_likely = False
|
|
else:
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_always = False
|
|
continue_branch = False
|
|
|
|
elif ((next_op == 8) or (next_op == 9) or (next_op == 15)) and \
|
|
(get_rt(cur_inst) == get_rt(next_inst)):
|
|
if prev_was_branch_likely or prev_was_branch_f_likely:
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_f_likely = False
|
|
else:
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_always = False
|
|
continue_branch = False
|
|
|
|
# TODO more funcs?
|
|
elif next_op == 0 and \
|
|
((next_func == 0) or (next_func == 2) or (next_func == 3) or \
|
|
(next_func == 24) or (next_func == 25) or (next_func == 26) or (next_func == 27) or \
|
|
(next_func == 32) or (next_func == 33) or (next_func == 34) or (next_func == 35) or \
|
|
(next_func == 36) or (next_func == 37) or (next_func == 38) or (next_func == 39) or \
|
|
(next_func == 42) or (next_func == 43)) and \
|
|
(get_rt(cur_inst) == get_rd(next_inst) and \
|
|
(get_rt(cur_inst) != get_rt(next_inst)) and (get_rt(cur_inst) != get_rs(next_inst))):
|
|
if prev_was_branch_likely or prev_was_branch_f_likely:
|
|
prev_was_branch_likely = False
|
|
prev_was_branch_f_likely = False
|
|
else:
|
|
prev_was_branch = False
|
|
prev_was_branch_f = False
|
|
prev_was_branch_always = False
|
|
continue_branch = False
|
|
|
|
visited.add(inst_read_addr)
|
|
|
|
if prev_was_branch or prev_was_branch_f or prev_was_branch_likely or prev_was_branch_f_likely:
|
|
if inst_i == debug_i:
|
|
print("branch start")
|
|
branch_candidates = self.determine_load_ref_impl(file, inst_i, (prev_target - file.vaddr) // 4, depth-i, True, visited)
|
|
candidates += branch_candidates
|
|
if inst_i == debug_i:
|
|
print("branch end, found {}".format(branch_candidates))
|
|
|
|
# if this is a jump, mark to return after we evaluate the following instruction
|
|
if prev_was_jump or prev_was_ret:
|
|
continue_branch = False
|
|
|
|
if prev_was_branch_always:
|
|
continue_branch = False
|
|
|
|
prev_was_jump = (next_op == 2 or next_op == 3)
|
|
prev_was_ret = (next_op == 0 and get_func(next_inst) == 8)
|
|
prev_was_branch = (next_op == 4 or next_op == 5 or next_op == 6 or next_op == 7) or \
|
|
(next_op == 1 and (get_rt(next_inst) == 0 or get_rt(next_inst) == 1))
|
|
prev_was_branch_f = ((next_op == 16) or (next_op == 17) or (next_op == 18)) and (get_rs(next_inst) == 8) and ((next_inst & (1 << 17)) == 0)
|
|
prev_was_branch_f_likely = ((next_op == 16) or (next_op == 17) or (next_op == 18)) and (get_rs(next_inst) == 8) and ((next_inst & (1 << 17)) != 0)
|
|
prev_was_branch_always = next_op == 4 and get_rs(next_inst) == get_rt(next_inst) == 0
|
|
prev_was_branch_likely = (next_op == 20 or next_op == 21 or next_op == 22 or next_op == 23) or \
|
|
(next_op == 1 and (get_rt(next_inst) == 2 or get_rt(next_inst) == 3))
|
|
prev_target = get_signed_imm(next_inst)*4 + (file.vaddr + inst_read_addr*4) + 4
|
|
|
|
inst_read_addr += 1
|
|
|
|
return candidates
|
|
|
|
def determine_load_ref(self, file, inst_i):
|
|
candidates = self.determine_load_ref_impl(file, inst_i, inst_i, 200)
|
|
if len(candidates) > 0:
|
|
first = candidates[0] # TODO multiple candidates
|
|
loadHighRefs[file.vaddr + inst_i*4] = (first[1], first[2])
|
|
loadLowRefs[first[0]] = (first[1], first[2])
|
|
for condidate in candidates[1:]:
|
|
loadLowRefs[condidate[0]] = (condidate[1], condidate[2])
|
|
|
|
def disassemble_inst(self, inst, addr, i, file):
|
|
if inst == 0:
|
|
return "nop"
|
|
|
|
dis = ""
|
|
op_num = get_op(inst)
|
|
|
|
if op_num == 0:
|
|
func = get_func(inst)
|
|
if func == 1:
|
|
cc = (inst & (7 << 18)) >> 18
|
|
if (inst & (1 << 16)) == 0:
|
|
dis += "movf\t%s, %s, %d" % (regs[get_rd(inst)], regs[get_rs(inst)], cc)
|
|
else:
|
|
dis += "movt\t%s, %s, %d" % (regs[get_rd(inst)], regs[get_rs(inst)], cc)
|
|
else:
|
|
if func not in funcs:
|
|
dis += "func_error: %d" % func
|
|
else:
|
|
if func == 37 and get_rt(inst) == 0: # or with zero reg is move
|
|
return "move\t%s, %s" % (regs[get_rd(inst)], regs[get_rs(inst)])
|
|
dis += "%s\t" % funcs[func]
|
|
if func == 0 or func == 2 or func == 3 or func == 56 or func == 59 or func == 60 or func == 63: # sll, srl, sra, dsll, dsra dsll32, dsra32
|
|
dis += "%s, %s, %d" % (regs[get_rd(inst)], regs[get_rt(inst)], get_shift(inst))
|
|
elif func == 4 or func == 6 or func == 7: # sllv, srlv, srav
|
|
dis += "%s, %s, %s" % (regs[get_rd(inst)], regs[get_rt(inst)], regs[get_rs(inst)])
|
|
elif func == 8 or func == 9: # jr, jalr
|
|
dis += "%s" % regs[get_rs(inst)]
|
|
elif func == 13: # break
|
|
dis += "0x%05X" % ((inst & (0xFFFFF << 6)) >> 16) # TODO the error code is 20 bits in the manual, why does gas want something else?
|
|
elif func == 16 or func == 18: # mfhi, mflo
|
|
dis += "%s" % regs[get_rd(inst)]
|
|
elif func == 17 or func == 19: # mthi, mtlo
|
|
dis += "%s" % regs[get_rs(inst)]
|
|
elif func == 24 or func == 25 or func == 28 or func == 29: # mult, multu, dmult, dmultu
|
|
dis += "%s, %s" % (regs[get_rs(inst)], regs[get_rt(inst)])
|
|
elif func == 26 or func == 27 or func == 30 or func == 31: # div, divu, ddiv, ddivu
|
|
dis += "$zero, %s, %s" % (regs[get_rs(inst)], regs[get_rt(inst)]) # TODO why does this need $zero for gas to not think it's a macro?
|
|
elif func == 34 and get_rs(inst) == 0: # sub with $zero is neg
|
|
dis = "neg\t%s, %s" % (regs[get_rd(inst)], regs[get_rt(inst)])
|
|
elif func == 35 and get_rs(inst) == 0: # subu with $zero is negu
|
|
dis = "negu\t%s, %s" % (regs[get_rd(inst)], regs[get_rt(inst)])
|
|
elif func == 20 or func == 22 or func == 23: # doubleword ops
|
|
dis += "%s, %s, %s" % (regs[get_rd(inst)], regs[get_rt(inst)], regs[get_rs(inst)])
|
|
else: # add, sub, logical, etc.
|
|
dis += "%s, %s, %s" % (regs[get_rd(inst)], regs[get_rs(inst)], regs[get_rt(inst)])
|
|
# TODO traps
|
|
|
|
elif op_num == 1:
|
|
rt = get_rt(inst)
|
|
if rt not in branch1reg:
|
|
dis += "branch1reg_erro: %d" % rt
|
|
else:
|
|
# TODO traps
|
|
dis += "%s\t%s, %s" % (branch1reg[rt], regs[get_rs(inst)], self.make_label(get_signed_imm(inst), addr))
|
|
|
|
elif op_num == 16 or op_num == 17 or op_num == 18:
|
|
z = op_num - 16
|
|
rs = get_rs(inst)
|
|
if rs == 0:
|
|
dis += "mfc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 1:
|
|
dis += "dmfc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 2:
|
|
dis += "cfc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 4:
|
|
dis += "mtc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 5:
|
|
dis += "dmtc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 6:
|
|
dis += "ctc%d\t%s, %s" % (z, regs[get_rt(inst)], float_reg(get_rd(inst)) if z != 0 else "$%d" % get_rd(inst))
|
|
elif rs == 8:
|
|
dis += "bc%d%s%s %s" % (z, "f" if ((inst & (1 << 16)) == 0) else "t", "" if ((inst & (1 << 17)) == 0) else "l", self.make_label(get_signed_imm(inst), addr))
|
|
elif rs == 16 or rs == 17 or rs == 20 or rs == 21:
|
|
if z == 0:
|
|
func = get_func(inst)
|
|
if func == 1:
|
|
dis += "tlbr"
|
|
elif func == 2:
|
|
dis += "tlbwi"
|
|
elif func == 6:
|
|
dis += "tlbwr"
|
|
elif func == 8:
|
|
dis += "tlbp"
|
|
elif func == 24:
|
|
dis += "eret"
|
|
else:
|
|
# TODO deret?
|
|
dis += "cop0_error: %d" % func
|
|
elif z != 1:
|
|
dis += "cop_error: %d" % z
|
|
else:
|
|
if rs == 16:
|
|
f = "s"
|
|
elif rs == 17:
|
|
f = "d"
|
|
elif rs == 20:
|
|
f = "w"
|
|
elif rs == 21:
|
|
f = "l"
|
|
func = get_func(inst)
|
|
if func not in floats:
|
|
dis += "float_error: %d" % func
|
|
else:
|
|
dis += "%s.%s\t" % (floats[func], f)
|
|
if func == 0 or func == 1 or func == 2 or func == 3 or func == 18 or func == 19: # 3 op
|
|
dis += "%s, %s, %s" % (float_reg(get_fd(inst)), float_reg(get_fs(inst)), float_reg(get_ft(inst)))
|
|
elif (func == 4 or func == 5 or func == 6 or func == 7 or func == 8 or func == 9 or func == 10 or func == 11 or func == 12
|
|
or func == 13 or func == 14 or func == 15 or func == 32 or func == 33 or func == 36 or func == 37): # 2 op
|
|
dis += "%s, %s" % (float_reg(get_fd(inst)), float_reg(get_fs(inst)))
|
|
elif func == 50 or func == 60 or func == 62: # c.eq, c.lt, c.le
|
|
dis += "%s, %s" % (float_reg(get_fs(inst)), float_reg(get_ft(inst)))
|
|
else:
|
|
dis += "coproc_error: %d" % rs
|
|
|
|
elif op_num not in ops:
|
|
dis += "error: %d" % op_num
|
|
|
|
else:
|
|
dis += "%s\t" % ops[op_num]
|
|
if op_num == 2 or op_num == 3: # j, jal
|
|
dis += "%s" % self.make_func(inst & 0x3FFFFFF, addr)
|
|
elif op_num == 4 or op_num == 5 or op_num == 20 or op_num == 21: # beq, bne, beql, bnel
|
|
if op_num == 4 and get_rs(inst) == get_rt(inst) == 0: # beq with both zero regs is a branch always (b %label)
|
|
dis = "b\t%s" % self.make_label(get_signed_imm(inst), addr)
|
|
else:
|
|
if get_rt(inst) == 0: # branchs comparing to 0 have a shorthand
|
|
dis = "%s\t" % ("beqz" if op_num == 4 else "bnez" if op_num == 5 else "beqzl" if op_num == 20 else "bnezl")
|
|
dis += "%s, %s" % (regs[get_rs(inst)], self.make_label(get_signed_imm(inst), addr))
|
|
else:
|
|
dis += "%s, %s, %s" % (regs[get_rs(inst)], regs[get_rt(inst)], self.make_label(get_signed_imm(inst), addr))
|
|
elif op_num == 6 or op_num == 7 or op_num == 22 or op_num == 23: # blez, bgtz, blezl, bgtzl
|
|
dis += "%s, %s" % (regs[get_rs(inst)], self.make_label(get_signed_imm(inst), addr))
|
|
elif op_num == 8 or op_num == 9 or op_num == 10 or op_num == 11 or op_num == 24 or op_num == 25: # addi, addiu, slti, sltiu, daddi, daddiu
|
|
if op_num == 9 and get_rs(inst) == 0: # addiu with reg 0 is load immediate (li)
|
|
dis = "li\t%s, %#X" % (regs[get_rt(inst)], get_signed_imm(inst))
|
|
elif op_num == 9 and addr in loadLowRefs: # addiu loading the lower half of a pointer
|
|
ref = loadLowRefs[addr]
|
|
dis += "%s, %s, %%lo(%s)" % (regs[get_rt(inst)], regs[get_rs(inst)], format_ref(self.make_load(ref[0]), ref[1]))
|
|
else:
|
|
dis += "%s, %s, %#X" % (regs[get_rt(inst)], regs[get_rs(inst)], get_signed_imm(inst))
|
|
elif op_num == 12 or op_num == 13 or op_num == 14: # andi, ori, xori
|
|
dis += "%s, %s, %#X" % (regs[get_rt(inst)], regs[get_rs(inst)], get_imm(inst))
|
|
elif op_num == 15: # lui
|
|
if not self.has_done_first_pass:
|
|
self.determine_load_ref(file, i)
|
|
if addr in loadHighRefs: # lui loading the higher half of a pointer
|
|
ref = loadHighRefs[addr]
|
|
dis += "%s, %%hi(%s)" % (regs[get_rt(inst)], format_ref(self.make_load(ref[0]), ref[1]))
|
|
else:
|
|
dis += "%s, 0x%04X" % (regs[get_rt(inst)], get_imm(inst))
|
|
elif (op_num == 32 or op_num == 33 or op_num == 34 or op_num == 35 or op_num == 38 or op_num == 40 or op_num == 41 or
|
|
op_num == 42 or op_num == 42 or op_num == 43 or op_num == 46 or op_num == 55 or op_num == 63): # load/stores
|
|
if addr in loadLowRefs: # loading with immediate forming lower half of pointer
|
|
ref = loadLowRefs[addr]
|
|
dis += "%s, %%lo(%s)(%s)" % (regs[get_rt(inst)], format_ref(self.make_load(ref[0]), ref[1]), regs[get_rs(inst)])
|
|
else:
|
|
dis += "%s, %#X(%s)" % (regs[get_rt(inst)], get_signed_imm(inst), regs[get_rs(inst)])
|
|
elif op_num == 36 or op_num == 37: # lbu, lhu
|
|
if addr in loadLowRefs: # loading with immediate forming lower half of pointer
|
|
ref = loadLowRefs[addr]
|
|
dis += "%s, %%lo(%s)(%s)" % (regs[get_rt(inst)], format_ref(self.make_load(ref[0]), ref[1]), regs[get_rs(inst)])
|
|
else:
|
|
dis += "%s, %#X(%s)" % (regs[get_rt(inst)], get_signed_imm(inst), regs[get_rs(inst)])
|
|
elif (op_num == 49 or op_num == 50 or op_num == 53 or op_num == 54 or op_num == 57 or op_num == 58 or
|
|
op_num == 61 or op_num == 62): # load/store between co-processors
|
|
if addr in loadLowRefs: # loading with immediate forming lower half of pointer
|
|
ref = loadLowRefs[addr]
|
|
dis += "%s, %%lo(%s)(%s)" % (float_reg(get_rt(inst)), format_ref(self.make_load(ref[0]), ref[1]), regs[get_rs(inst)])
|
|
else:
|
|
dis += "%s, %#X(%s)" % (float_reg(get_rt(inst)), get_signed_imm(inst), regs[get_rs(inst)])
|
|
elif op_num == 47: # cache
|
|
if addr in loadLowRefs: # cache op with immediate forming lower half of pointer
|
|
ref = loadLowRefs[addr]
|
|
dis += "0x%02X, %%lo(%s)(%s)" % (get_rt(inst), format_ref(self.make_load(ref[0]), ref[1]), regs[get_rs(inst)])
|
|
else:
|
|
dis += "0x%02X, %#X(%s)" % (get_rt(inst), get_signed_imm(inst), regs[get_rs(inst)])
|
|
|
|
return dis
|
|
|
|
def generate_undefined(self, path):
|
|
self.first_pass() # find functions and variables
|
|
with open(path + "/undef.txt", 'w', newline='\n') as f:
|
|
for addr in sorted(self.vars):
|
|
if addr < 0x80000000:
|
|
continue # Don't print out symbols of dmadata files' vrom addresses. These will be defined in another file.
|
|
|
|
is_in_bss, region = self.is_in_bss(addr)
|
|
if is_in_bss:
|
|
f.write("%s = %s_bss_start + 0x%08X;\n" % (self.make_load(addr), region[2], addr - region[0]))
|
|
elif not self.is_in_data(addr):
|
|
f.write("%s = 0x%08X;\n" % (self.make_load(addr), addr))
|
|
|
|
# TODO -a --analyze flag? Only when its set will new symbols be added, otherwise use only the supplied ones
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-u', '--undefined', help='create linker script for undefined symbols', metavar='path')
|
|
parser.add_argument('-d', '--disassemble', help='disassemble supplied code files', metavar='path')
|
|
parser.add_argument('-l', '--files', help='list of files to disassemble', metavar='filename', action='append')
|
|
parser.add_argument('-f', '--functions', help='predefined functions', metavar='filename', action='append')
|
|
parser.add_argument('-o', '--objects', help='predefined code objects', metavar='filename', action='append')
|
|
parser.add_argument('-v', '--variables', help='predefined variables', metavar='filename', action='append')
|
|
parser.add_argument('-a', '--auto-analysis', help='automatically find pointers and functions', action='store_true', default=False)
|
|
args = parser.parse_args()
|
|
|
|
for files_file in args.files:
|
|
with open(files_file, 'r') as f:
|
|
known_files = ast.literal_eval(f.read())
|
|
for function_file in args.functions:
|
|
with open(function_file, 'r') as f:
|
|
known_funcs = ast.literal_eval(f.read())
|
|
for object_file in args.objects:
|
|
with open(object_file, 'r') as f:
|
|
known_objects = ast.literal_eval(f.read())
|
|
for var_file in args.variables:
|
|
with open(var_file, 'r') as f:
|
|
known_vars.update(ast.literal_eval(f.read()))
|
|
|
|
dis = Disassembler()
|
|
dis.load_defaults() # TODO file loading code should go in here
|
|
|
|
if args.auto_analysis:
|
|
dis.set_auto_analysis(True)
|
|
|
|
if args.disassemble != None:
|
|
os.makedirs(args.disassemble, exist_ok=True)
|
|
os.makedirs(args.disassemble + "/boot", exist_ok=True)
|
|
os.makedirs(args.disassemble + "/code", exist_ok=True)
|
|
os.makedirs(args.disassemble + "/overlays", exist_ok=True)
|
|
dis.disassemble(args.disassemble)
|
|
if args.undefined != None:
|
|
os.makedirs(args.undefined, exist_ok=True)
|
|
dis.generate_undefined(args.undefined)
|
|
|