From 0418ad11d3772f8c5549dbf9a792c902b3eccd34 Mon Sep 17 00:00:00 2001 From: rozlette Date: Sun, 28 Oct 2018 01:43:30 -0500 Subject: [PATCH] Cleanup and fixed build --- disasm.py | 142 ++++++++++++++++++++++---------------------- functions.py | 4 +- include/functions.h | 8 +-- include/variables.h | 3 +- undef.txt | 1 + 5 files changed, 78 insertions(+), 80 deletions(-) diff --git a/disasm.py b/disasm.py index 69d4a836b0..f12254faf8 100644 --- a/disasm.py +++ b/disasm.py @@ -58,14 +58,14 @@ floats = { def read_file(name): file_data=[] - + try: with open(name, 'rb') as f: file_data = f.read() except IOError: print('failed to read file ' + name) return file_data - + def float_reg(num): if num == 31: @@ -119,10 +119,10 @@ def get_signed_imm(inst): imm = -2**15 + (imm & 0b00000000000000000111111111111111) return imm - + def is_load(inst): return get_op(inst) > 31 - + def get_func_name(addr): if addr in known_funcs: @@ -136,26 +136,26 @@ def get_symbol_name(addr): return known_vars[addr][0] else: return "D_%08X" % addr - - + + def write_header(file): file.write(".set noat # allow use of $at\n.set noreorder # don't insert nops after branches\n.set gp=64 # allow use of 64bit registers\n\n"); - - + + # TODO add code_regions? class Disassembler: - - class File: + + class File: def __init__(self, name, data, vaddr): self.name = name self.data = data self.vaddr = vaddr self.size = len(data) - + def get_inst(self, num): offset = num*4 return struct.unpack('>I', self.data[offset:offset+4])[0] - + def __init__(self): self.files = list() self.objects = set() @@ -163,10 +163,10 @@ class Disassembler: self.labels = set() self.vars = set() self.data_regions = list() - + self.is_data_cache = {} self.is_code_cache = {} - + def load_defaults(self): for file in known_files: self.add_file(file[0], file[1], file[2]) @@ -181,36 +181,36 @@ class Disassembler: for addr in known_objects: self.add_object(addr) self.add_function(addr) # assume every object starts with a function - + for addr in known_vars: self.add_variable(addr) - + def add_file(self, path, name, vaddr): self.files.append(self.File(name, read_file(path), vaddr)) self.is_data_cache = {} self.is_code_cache = {} - + def add_object(self, addr): self.objects.add(addr) - + def add_function(self, addr): self.functions.add(addr) - + def add_variable(self, addr): self.vars.add(addr) - + def add_label(self, addr): self.labels.add(addr) - + def add_data_region(self, start, end): self.data_regions.append((start, end)) self.is_data_cache = {} self.is_code_cache = {} - + def is_in_data(self, addr): if addr in self.is_data_cache: return self.is_data_cache[addr] - + start = 0; last = len(self.data_regions) - 1 while start <= last: @@ -223,14 +223,14 @@ class Disassembler: start = midpoint + 1 else: last = midpoint - 1 - + self.is_data_cache[addr] = False return False - + def is_in_code(self, addr): if addr in self.is_code_cache: return self.is_code_cache[addr] - + start = 0; last = len(self.files) - 1 while start <= last: @@ -243,22 +243,22 @@ class Disassembler: start = midpoint + 1 else: last = midpoint - 1 - + self.is_code_cache[addr] = False return False - + def is_in_data_or_undef(self, addr): # return true if it is in a defined data region if self.is_in_data(addr): return True - + # otherwise return false if it is in a file's bounds if self.is_in_code(addr): return False - + # otherwise it is undefined (return true) return True - + def make_label(self, imm, cur): addr = (imm*4) + cur + 4 self.add_label(addr) @@ -274,11 +274,11 @@ class Disassembler: return get_symbol_name(addr) else: return get_func_name(addr) - - # TODO refactor to remove file_addr + + # TODO refactor to remove file_addr def get_object_name(self, addr, file_addr): filename = ""; - + for file in self.files: if file_addr == file.vaddr: filename = file.name @@ -286,7 +286,7 @@ class Disassembler: if filename == "": print("Bad file_addr passed to get_object_name: 0x%0X" % addr) return - + if SPLIT_FILES: if addr in known_objects and known_objects[addr] != "": # no name means object boundary is known but not the name return known_objects[addr] @@ -294,7 +294,7 @@ class Disassembler: return '%s_0x%08X' % (filename, addr) else: return "%s" % filename - + def guess_functions_from_data(self): for file in self.files: for i in range(0, file.size // 4): @@ -302,18 +302,18 @@ class Disassembler: addr = file.vaddr + i*4 if self.is_in_data(addr) and self.is_in_code(word): self.add_function(word) - - - + + + def disassemble(self, path): # TODO keep sorted self.files = sorted(self.files, key = lambda file: file.vaddr) self.data_regions = sorted(self.data_regions, key = lambda region: region[0]) - + self.__first_pass() self.guess_functions_from_data() self.__second_pass(path) - + def __first_pass(self): for file in self.files: for i in range(0, file.size // 4): @@ -327,21 +327,21 @@ class Disassembler: if file.get_inst(next_index) == 0: # nop while file.get_inst(next_index) == 0: next_index += 1 - + new_object_start = file.vaddr + next_index*4 + 15 new_object_start -= new_object_start % 16 - + # don't split if it's the start of a data section, it's probably the same object if not self.is_in_data_or_undef(new_object_start): self.add_object(new_object_start) - + def __second_pass(self, path): for file in self.files: filename = path + '/%s.asm' % self.get_object_name(file.vaddr, file.vaddr); - + with open(filename, 'w') as f: write_header(f) - + for i in range(0, file.size // 4): inst = file.get_inst(i) addr = file.vaddr + i*4 @@ -365,23 +365,23 @@ class Disassembler: f.write("/* %06d 0x%08X %08X */ %s\n" % (i, addr, inst, self.disassemble_inst(inst, addr, i, file))) else: f.write("/* %06d 0x%08X */ .word\t0x%08X\n" % (i, addr, inst)) - + def determine_load_ref(self, file, inst_i): # TODO better detect when the register gets dirty pc = file.vaddr + inst_i*4 cur_inst = file.get_inst(inst_i) - + if get_op(cur_inst) != 15: return prev_was_jump = False - + for i in range(1, 7): # TODO find a good limit next_inst = file.get_inst(inst_i + i) if get_op(next_inst) == 15 and get_rt(cur_inst) == get_rt(next_inst): return # return if another lui overwrites reg - + if (get_op(next_inst) == 9) and (get_rt(cur_inst) == get_rt(next_inst) == get_rs(next_inst)): # lui + addiu (move pointer) addr = (get_imm(cur_inst) << 16) + get_signed_imm(next_inst) if self.is_in_data_or_undef(addr): @@ -408,14 +408,14 @@ class Disassembler: return if get_op(next_inst) == 2 or get_op(next_inst) == 3: prev_was_jump = True - + def disassemble_inst(self, inst, addr, i, file): if inst == 0: return "nop" - + dis = "" op_num = get_op(inst) - + if op_num == 0: func = get_func(inst) if func == 1: @@ -456,7 +456,7 @@ class Disassembler: else: # add, sub, logical, etc. dis += "%s, %s, %s" % (regs[get_rd(inst)], regs[get_rs(inst)], regs[get_rt(inst)]) # TODO traps - + elif op_num == 1: rt = get_rt(inst) if rt not in branch1reg: @@ -464,7 +464,7 @@ class Disassembler: else: # TODO traps dis += "%s\t%s, %s" % (branch1reg[rt], regs[get_rs(inst)], self.make_label(get_signed_imm(inst), addr)) - + elif op_num == 16 or op_num == 17 or op_num == 18: z = op_num - 16 rs = get_rs(inst) @@ -523,10 +523,10 @@ class Disassembler: dis += "%s, %s" % (float_reg(get_fs(inst)), float_reg(get_ft(inst))) else: dis += "coproc_error: %d" % rs - + elif op_num not in ops: dis += "error: %d" % op_num - + else: dis += "%s\t" % ops[op_num] if op_num == 2 or op_num == 3: # j, jal @@ -581,26 +581,26 @@ class Disassembler: dis += "0x%02X, %d(%s)" % (get_rt(inst), get_signed_imm(inst), regs[get_rs(inst)]) return dis - + def generate_headers(self, path): with open(path + "functions.h", 'w', newline='\n') as f: f.write("#ifndef _FUNCTIONS_H_\n#define _FUNCTIONS_H_\n\n"); - - f.write('#include \n#include \n#include \n#include \n\n'); - + + f.write('#include \n#include \n#include \n#include \n#include \n\n'); + for addr in sorted(self.functions): if addr in known_funcs: f.write("%s %s(%s); // func_%08X\n" % (known_funcs[addr][1], get_func_name(addr), known_funcs[addr][2], addr)); else: f.write("// UNK_RET %s(UNK_ARGS);\n" % get_func_name(addr)); - + f.write("\n#endif\n"); - + with open(path + "variables.h", 'w', newline='\n') as f: f.write("#ifndef _VARIABLES_H_\n#define _VARIABLES_H_\n\n"); - - f.write('#include \n#include \n#include \n#include \n\n'); - + + f.write('#include \n#include \n#include \n#include \n#include \n\n'); + for addr in sorted(self.vars): if addr in known_vars: f.write("extern %s %s%s; // D_%08X\n" % (known_vars[addr][1], self.make_load(addr), "[]" if known_vars[addr][2] else "", addr)); @@ -610,20 +610,20 @@ class Disassembler: f.write("\n// extra variables needed for one reason or another\n\n"); for (name, var_type) in extra_vars: f.write("extern %s %s;\n" % (var_type, name)); - + f.write("\n#endif\n"); - + with open("undef.txt", 'w', newline='\n') as f: for addr in sorted(self.vars): f.write("%s = 0x%08X;\n" % (self.make_load(addr), addr)); - + # TODO not hard code f.write("func_84001B0C = 0x84001B0C;\nfunc_840010CC = 0x840010CC;\nfunc_84001060 = 0x84001060;\nD_80920340 = 0x80920340;\nD_80922430 = 0x80922430;\n") - + if __name__ == "__main__": dis = Disassembler() dis.load_defaults() dis.disassemble('./asm/') dis.generate_headers('./') - + diff --git a/functions.py b/functions.py index 278f5a6a41..17e625e95e 100644 --- a/functions.py +++ b/functions.py @@ -214,7 +214,7 @@ known_funcs = { 0x800CAF24:("func_800CAF24","UNK_RET","UNK_TYPE"), 0x800CAF38:("func_800CAF38","UNK_RET","UNK_TYPE"), 0x800E03A0:("func_800E03A0","s800E03A0*","s32"), - 0x800E03A0:("func_800E03CC","u8*","void"), + 0x800E03CC:("func_800E03CC","void","u8*"), 0x800E11EC:("func_800E11EC","UNK_RET","UNK_TYPE, UNK_PTR"), 0x800E1374:("func_800E1374","UNK_RET","UNK_TYPE, UNK_PTR, struct s800A5AC0*, UNK_PTR"), 0x800E2928:("func_800E2928","UNK_RET","UNK_TYPE, UNK_TYPE, UNK_PTR"), @@ -225,7 +225,7 @@ known_funcs = { 0x80139894:("func_80139894","UNK_RET","UNK_PTR, UNK_TYPE, UNK_TYPE, UNK_TYPE, UNK_TYPE, UNK_TYPE, UNK_TYPE, f32, f32, f32"), 0x80174BF0:("func_80174BF0","UNK_RET","UNK_TYPE"), 0x8018349C:("func_8018349C","UNK_RET","UNK_TYPE, UNK_TYPE"), - + # ovl_En_Test #0x80862B70:("func_80862B70","void","void* a0"), #0x80862CBC:("func_80862CBC","UNK_RET","UNK_ARGS"), diff --git a/include/functions.h b/include/functions.h index 26bcbeb251..c327f2db47 100644 --- a/include/functions.h +++ b/include/functions.h @@ -1333,8 +1333,8 @@ UNK_RET func_800CAF38(UNK_TYPE); // func_800CAF38 // UNK_RET func_800E0308(UNK_ARGS); // UNK_RET func_800E031C(UNK_ARGS); // UNK_RET func_800E0348(UNK_ARGS); -s800E03A0* func_800E03A0(s32); -void func_800E03CC(u8* a0); +s800E03A0* func_800E03A0(s32); // func_800E03A0 +void func_800E03CC(u8*); // func_800E03CC // UNK_RET func_800E0410(UNK_ARGS); // UNK_RET func_800E04BC(UNK_ARGS); // UNK_RET func_800E04EC(UNK_ARGS); @@ -4416,7 +4416,6 @@ UNK_RET func_8018349C(UNK_TYPE, UNK_TYPE); // func_8018349C // UNK_RET func_801AAA58(UNK_ARGS); // UNK_RET func_801AAA8C(UNK_ARGS); // UNK_RET func_801AAAA0(UNK_ARGS); -// UNK_RET func_801AAAB0(UNK_ARGS); // UNK_RET func_80862B70(UNK_ARGS); // UNK_RET func_80862CBC(UNK_ARGS); // UNK_RET func_80862EDC(UNK_ARGS); @@ -19484,8 +19483,5 @@ UNK_RET func_8018349C(UNK_TYPE, UNK_TYPE); // func_8018349C // UNK_RET func_80C25E38(UNK_ARGS); // UNK_RET func_80C25EF0(UNK_ARGS); // UNK_RET func_80C25F4C(UNK_ARGS); -// UNK_RET func_84001060(UNK_ARGS); -// UNK_RET func_840010CC(UNK_ARGS); -// UNK_RET func_84001B0C(UNK_ARGS); #endif diff --git a/include/variables.h b/include/variables.h index 07a387b072..1fd0b7e46c 100644 --- a/include/variables.h +++ b/include/variables.h @@ -2186,6 +2186,7 @@ extern UNK_TYPE D_8009E624; // D_8009E624 //extern UNK_TYPE D_8009F8B0; //extern UNK_TYPE D_800A0004; //extern UNK_TYPE D_80186028; +//extern UNK_TYPE D_801AAAB0; extern UNK_TYPE D_801ADE80; // D_801ADE80 extern UNK_TYPE D_801ADEAC; // D_801ADEAC extern int D_801ADEB0[]; // D_801ADEB0 @@ -2312,7 +2313,7 @@ extern UNK_TYPE D_801B4610; // D_801B4610 //extern UNK_TYPE D_801B9F04; //extern UNK_TYPE D_801B9F0C; //extern UNK_TYPE D_801B9F10; -extern s800E03A0 D_801B9F20; +extern s800E03A0 D_801B9F20; // D_801B9F20 //extern UNK_TYPE D_801BA200; //extern UNK_TYPE D_801BA240; //extern UNK_TYPE D_801BA258; diff --git a/undef.txt b/undef.txt index 30b90e779f..07ce40216b 100644 --- a/undef.txt +++ b/undef.txt @@ -2177,6 +2177,7 @@ D_8009F8A0 = 0x8009F8A0; D_8009F8B0 = 0x8009F8B0; D_800A0004 = 0x800A0004; D_80186028 = 0x80186028; +D_801AAAB0 = 0x801AAAB0; D_801ADE80 = 0x801ADE80; D_801ADEAC = 0x801ADEAC; D_801ADEB0 = 0x801ADEB0;