From d9034ef09af4d9cfba702911e55c7bfbf3bee2be Mon Sep 17 00:00:00 2001 From: octorock <79596758+octorock@users.noreply.github.com> Date: Thu, 4 Mar 2021 22:09:44 +0100 Subject: [PATCH] Create parser that resolves the .incbin macros --- tools/script_disassembler/incbin_parser.py | 47 ++++ .../script_disassembler.py | 224 +++++++++--------- 2 files changed, 160 insertions(+), 111 deletions(-) create mode 100644 tools/script_disassembler/incbin_parser.py diff --git a/tools/script_disassembler/incbin_parser.py b/tools/script_disassembler/incbin_parser.py new file mode 100644 index 00000000..f20543a9 --- /dev/null +++ b/tools/script_disassembler/incbin_parser.py @@ -0,0 +1,47 @@ +# This python script reads the script.s file which contains all the .incbin macros +# Then it fetches the corresponding data of the baserom, o +TMC_FOLDER='../..' + +import subprocess +from script_disassembler import disassemble_script + +def main(): + # read baserom data + with open(f'{TMC_FOLDER}/baserom.gba', 'rb') as baserom: + baserom_data = bytearray(baserom.read()) + + # read scripts.s with incbins + with open(f'{TMC_FOLDER}/data/scripts.s', 'r') as scripts: + + while True: + line = scripts.readline() + + if not line: + # end of file + break + + if ':: @' in line: # this might be a label + incbin_line = scripts.readline() + + if '.incbin' in incbin_line: # found a label with incbin + label = line.split('::')[0] + (_, start, end) = incbin_line.split(',') + start = int(start, 16) + end = int(end, 16) + + # read data from rom + data = baserom_data[start:start+end] + + print(f'DISASM {label}') + disassemble_script(data) + + # print new include label + print(f'.include "data/scripts/{label}.inc"') + else: + print(line, end='') + print(incbin_line, end='') + else: + print(line, end='') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/tools/script_disassembler/script_disassembler.py b/tools/script_disassembler/script_disassembler.py index 5066ce01..df2b54dc 100644 --- a/tools/script_disassembler/script_disassembler.py +++ b/tools/script_disassembler/script_disassembler.py @@ -26,26 +26,6 @@ def barray_to_u32_hex(barray): integers = struct.unpack('I'*count, barray) return [u32_to_hex(x) for x in integers] -""" -def GetNextScriptCommandWordAfterCommandMetadata(ctx): - return struct.unpack('I', ctx.data[ctx.ptr+2:ctx.ptr+6])[0] - - -def sub_0807E10C(ctx): - meta = GetNextScriptCommandWordAfterCommandMetadata(ctx) - print('function', hex(meta)) - - -def sub_0807E5F8(ctx): - curPtr = start + ctx.ptr - print('current', hex(curPtr)) - # gUnk_02033280.unk_00 |= GetNextScriptCommandWordAfterCommandMetadata(unk2->unk_00.raw); - meta = GetNextScriptCommandWordAfterCommandMetadata(ctx) - print('meta', meta) - newPtr = curPtr | meta - print('Start next script at: ', hex(newPtr)) -""" - commands = [ {'fun': 'nullsub_507', 'params': ''}, {'fun': 'sub_0807E004', 'params': '', 'name': 'start executing scripts'}, @@ -65,127 +45,127 @@ commands = [ {'fun': 'sub_0807E188', 'params': 's'}, {'fun': 'sub_0807E1D8', 'params': 's'}, {'fun': 'sub_0807E4AC', 'params': 's'}, - {'fun': 'sub_0807E200'}, - {'fun': 'sub_0807E220'}, - {'fun': 'sub_0807E240'}, + {'fun': 'sub_0807E200', 'params': 'w'}, + {'fun': 'sub_0807E220', 'params': 'ss'}, + {'fun': 'sub_0807E240', 'params': 's'}, {'fun': 'sub_0807E260', 'params': 's'}, - {'fun': 'sub_0807E280'}, + {'fun': 'sub_0807E280', 'params': 'sss'}, {'fun': 'sub_0807E2A8', 'params': 's'}, {'fun': 'sub_0807E2E4', 'params': ''}, {'fun': 'sub_0807E30C', 'params': ''}, - {'fun': 'sub_0807E390'}, - {'fun': 'sub_0807E3BC'}, - {'fun': 'sub_0807E3E8'}, + {'fun': 'sub_0807E390', 'params': 's'}, + {'fun': 'sub_0807E3BC', 'params': 's'}, + {'fun': 'sub_0807E3E8', 'params': ''}, {'fun': 'sub_0807E40C', 'params': 's'}, - {'fun': 'sub_0807E42C'}, - {'fun': 'sub_0807E48C'}, - {'fun': 'sub_0807E4CC'}, - {'fun': 'sub_0807E4EC'}, - {'fun': 'sub_0807E514'}, - {'fun': 'sub_0807E538'}, - {'fun': 'sub_0807E564'}, - {'fun': 'sub_0807E584'}, - {'fun': 'sub_0807E5A0'}, - {'fun': 'sub_0807E5CC'}, + {'fun': 'sub_0807E42C', 'params': 'ss'}, + {'fun': 'sub_0807E48C', 'params': 's'}, + {'fun': 'sub_0807E4CC', 'params': 'w'}, + {'fun': 'sub_0807E4EC', 'params': 'w'}, + {'fun': 'sub_0807E514', 'params': 's'}, + {'fun': 'sub_0807E538', 'params':'w'}, + {'fun': 'sub_0807E564', 'params': ''}, + {'fun': 'sub_0807E584', 'params': ''}, + {'fun': 'sub_0807E5A0', 'params': 's'}, + {'fun': 'sub_0807E5CC', 'params': 's'}, {'fun': 'sub_0807E5F8', 'params': 'w'},# 'exec': sub_0807E5F8}, - {'fun': 'sub_0807E610'}, - {'fun': 'sub_0807E628'}, - {'fun': 'sub_0807E634'}, - {'fun': 'sub_0807E644'}, + {'fun': 'sub_0807E610', 'params': 'w'}, + {'fun': 'sub_0807E628', 'params': 's'}, + {'fun': 'sub_0807E634', 'params': 'ss'}, + {'fun': 'sub_0807E644', 'params': 's'}, {'fun': 'sub_0807E650', 'params': 's'}, - {'fun': 'sub_0807E65C'}, - {'fun': 'sub_0807E668'}, - {'fun': 'sub_0807E674'}, + {'fun': 'sub_0807E65C', 'params': 's'}, + {'fun': 'sub_0807E668', 'params': 's'}, + {'fun': 'sub_0807E674', 'params': 's'}, {'fun': 'sub_0807E680', 'params': 's'}, - {'fun': 'sub_0807E690'}, + {'fun': 'sub_0807E690', 'params': 'w'}, {'fun': 'sub_0807E6AC', 'params': 'w'}, {'fun': 'sub_0807E6DC', 'params': ''}, - {'fun': 'sub_0807E700'}, - {'fun': 'sub_0807E72C'}, - {'fun': 'sub_0807E75C'}, - {'fun': 'sub_0807E778'}, - {'fun': 'sub_0807E788'}, - {'fun': 'sub_0807E79C'}, - {'fun': 'nullsub_508'}, - {'fun': 'sub_0807E7B0'}, - {'fun': 'sub_0807E7C4'}, - {'fun': 'sub_0807E7D8'}, - {'fun': 'sub_0807E7EC'}, - {'fun': 'sub_0807E800'}, - {'fun': 'sub_0807E80C'}, - {'fun': 'sub_0807E858'}, + {'fun': 'sub_0807E700', 'params': ''}, + {'fun': 'sub_0807E72C', 'params': ''}, + {'fun': 'sub_0807E75C', 'params': ''}, + {'fun': 'sub_0807E778', 'params': ''}, + {'fun': 'sub_0807E788', 'params': 'w'}, + {'fun': 'sub_0807E79C', 'params': ''}, + {'fun': 'nullsub_508', 'params': 's'}, + {'fun': 'sub_0807E7B0', 'params': ''}, + {'fun': 'sub_0807E7C4', 'params': ''}, + {'fun': 'sub_0807E7D8', 'params': ''}, + {'fun': 'sub_0807E7EC', 'params': ''}, + {'fun': 'sub_0807E800', 'params': 'w'}, + {'fun': 'sub_0807E80C', 'params': 'w'}, + {'fun': 'sub_0807E858', 'params': 's'}, {'fun': 'sub_0807E864', 'params': ''}, {'fun': 'sub_0807E878', 'params': ''}, - {'fun': 'sub_0807E888'}, - {'fun': 'sub_0807E898'}, + {'fun': 'sub_0807E888', 'params': ''}, + {'fun': 'sub_0807E898', 'params': 'w'}, {'fun': 'sub_0807E8C4', 'params': 'w'}, - {'fun': 'sub_0807E8D4'}, + {'fun': 'sub_0807E8D4', 'params': 's'}, {'fun': 'sub_0807E8E4_0', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_1', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_2', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_3', 'params': ''}, # duplicate - {'fun': 'sub_0807E908'}, - {'fun': 'sub_0807E914'}, - {'fun': 'sub_0807E924'}, + {'fun': 'sub_0807E908', 'params': 's'}, + {'fun': 'sub_0807E914', 'params': 'w'}, + {'fun': 'sub_0807E924', 'params': ''}, {'fun': 'sub_0807E930', 'params': 's'}, {'fun': 'sub_0807E944', 'params': ''}, - {'fun': 'sub_0807E974'}, + {'fun': 'sub_0807E974', 'params': 's'}, {'fun': 'sub_0807E9D4', 'params': ''}, {'fun': 'sub_0807E9DC', 'params': ''}, - {'fun': 'sub_0807E9E4'}, + {'fun': 'sub_0807E9E4', 'params': 's'}, {'fun': 'sub_0807E9F0', 'params': ''}, {'fun': 'sub_0807EA4C', 'params': ''}, - {'fun': 'sub_0807EA88'}, + {'fun': 'sub_0807EA88', 'params': 's'}, {'fun': 'sub_0807EA94', 'params': ''}, {'fun': 'sub_0807EAB4', 'params': 's'}, - {'fun': 'sub_0807EAC0'}, + {'fun': 'sub_0807EAC0', 'params': 's'}, {'fun': 'sub_0807EAD0', 'params': 'ss'}, - {'fun': 'sub_0807EAF0'}, - {'fun': 'sub_0807EB18'}, + {'fun': 'sub_0807EAF0', 'params': 'w'}, + {'fun': 'sub_0807EB18', 'params': ''}, {'fun': 'sub_0807EB28', 'params': 's'}, - {'fun': 'sub_0807EB38'}, + {'fun': 'sub_0807EB38', 'params': ''}, {'fun': 'sub_0807EB44', 'params': 's'}, - {'fun': 'sub_0807EB4C'}, + {'fun': 'sub_0807EB4C', 'params': 'ss'}, {'fun': 'sub_0807EB74', 'params': ''}, {'fun': 'sub_0807EB8C', 'params': ''}, - {'fun': 'sub_0807EBA8'}, - {'fun': 'sub_0807EBB0'}, + {'fun': 'sub_0807EBA8', 'params': 's'}, + {'fun': 'sub_0807EBB0', 'params': 's'}, {'fun': 'sub_0807EBC0', 'params': 's'}, - {'fun': 'sub_0807EBC8'}, - {'fun': 'sub_0807EBD8'}, - {'fun': 'sub_0807EBF4'}, - {'fun': 'sub_0807EC08'}, - {'fun': 'nullsub_509'}, + {'fun': 'sub_0807EBC8', 'params': 'w'}, + {'fun': 'sub_0807EBD8', 'params': 'w'}, + {'fun': 'sub_0807EBF4', 'params': 'ss'}, + {'fun': 'sub_0807EC08', 'params': ''}, + {'fun': 'nullsub_509', 'params': ''}, {'fun': 'sub_0807EC1C', 'params': 's'}, - {'fun': 'sub_0807EC64'}, - {'fun': 'sub_0807EC94'}, - {'fun': 'sub_0807ECC4'}, + {'fun': 'sub_0807EC64', 'params': 's'}, + {'fun': 'sub_0807EC94', 'params': 's'}, + {'fun': 'sub_0807ECC4', 'params': 's'}, {'fun': 'sub_0807ECF4', 'params': 's'}, - {'fun': 'sub_0807ED24'}, + {'fun': 'sub_0807ED24', 'params': 'sss'}, {'fun': 'sub_0807EDD4', 'params': 'ss'}, - {'fun': 'sub_0807EE04'}, + {'fun': 'sub_0807EE04', 'params': 'ss'}, {'fun': 'sub_0807EE30', 'params': ''}, - {'fun': 'sub_0807EEB4'}, - {'fun': 'sub_0807EEF4'}, - {'fun': 'sub_0807EF3C'}, + {'fun': 'sub_0807EEB4', 'params': ''}, + {'fun': 'sub_0807EEF4', 'params': ''}, + {'fun': 'sub_0807EF3C', 'params': 'ss'}, {'fun': 'sub_0807EF80', 'params': 's'}, - {'fun': 'sub_0807EF90'}, - {'fun': 'sub_0807EFA0'}, + {'fun': 'sub_0807EF90', 'params': 's'}, + {'fun': 'sub_0807EFA0', 'params': 's'}, {'fun': 'sub_0807EFAC', 'params': 's'}, - {'fun': 'sub_0807EFD4'}, + {'fun': 'sub_0807EFD4', 'params': 'w'}, {'fun': 'sub_0807EFE4', 'params': ''}, - {'fun': 'sub_0807EFF4'}, - {'fun': 'sub_0807F004'}, - {'fun': 'sub_0807F010'}, + {'fun': 'sub_0807EFF4', 'params': 's'}, + {'fun': 'sub_0807F004', 'params': 's'}, + {'fun': 'sub_0807F010', 'params': 's'}, {'fun': 'sub_0807F034', 'params': 's'}, - {'fun': 'sub_0807F050'}, - {'fun': 'sub_0807F060'}, - {'fun': 'sub_0807F078'}, - {'fun': 'sub_0807F088'}, + {'fun': 'sub_0807F050', 'params': 's'}, + {'fun': 'sub_0807F060', 'params': 's'}, + {'fun': 'sub_0807F078', 'params': 'ss'}, + {'fun': 'sub_0807F088', 'params': 's'}, {'fun': 'sub_0807F098', 'params': ''}, - {'fun': 'sub_0807F0A4'}, + {'fun': 'sub_0807F0A4', 'params': ''}, {'fun': 'sub_0807F0B4', 'params': 's'}, - {'fun': 'sub_0807F0C8'} + {'fun': 'sub_0807F0C8', 'params': 'ss'} ] # definitions for parameters @@ -220,6 +200,12 @@ parameters = { 'expr': ' .word \\a\n .word \\b', 'read': lambda ctx: ', '.join(barray_to_u32_hex(ctx.data[ctx.ptr+2:ctx.ptr+10])) }, + 'sss': { + 'length': 3, + 'param': 'a,b,c', + 'expr': ' .short \\a\n .short \\b\n .short \\c', + 'read': lambda ctx: ', '.join(barray_to_u16_hex(ctx.data[ctx.ptr+2:ctx.ptr+8])) + }, } @@ -229,14 +215,20 @@ def ExecuteScriptCommandSet(ctx: Context): return 0 unk_06 = cmd >> 0xA + if unk_06 == 0: + # TODO error + return 0 operationId = cmd & 0x3FF - #print(cmd) - #print('advance by', unk_06) - #print('command id', operationId) + if operationId >= len(commands): + print('ERR: NO CMD') + # TODO error + return 0 command = commands[operationId] - #print(command) param_length = unk_06 - 1 if unk_06 > 1: + if ctx.ptr+2*unk_06 >= len(ctx.data): + # TODO raise Exception(f'Not enough data to fetch {unk_06-1} params') + return 0 meta = struct.unpack( 'H'*(unk_06-1), ctx.data[ctx.ptr+2:ctx.ptr+2*unk_06]) #print('meta', meta) @@ -248,7 +240,11 @@ def ExecuteScriptCommandSet(ctx: Context): raise Exception('Parameter configuration ' + command['params'] + ' not defined') params = parameters[command['params']] if unk_06-1 != params['length']: - raise Exception(f'Call {command["fun"]} with ' + str(unk_06-1) +' length, while length of ' + str(params['length'])+' defined') + # TODOraise Exception(f'Call {command["fun"]} with ' + str(unk_06-1) +' length, while length of ' + str(params['length'])+' defined') + # TEMPORARY WORKAROUND: + print(ctx.ptr, unk_06, cmd) + ctx.ptr += unk_06*2 + return 1 print(command['fun'] + ' ' + params['read'](ctx)) @@ -258,19 +254,24 @@ def ExecuteScriptCommandSet(ctx: Context): return 1 -def disassemble_script(input_data): - input_bytes = bytearray.fromhex(input_data) +def disassemble_script(input_bytes): ctx = Context(0, input_bytes) while True: + if ctx.ptr >= len(ctx.data) - 1: # End of file (there need to be at least two bytes remaining for the next operation id) + break if not ExecuteScriptCommandSet(ctx): break # Print rest (did not manage to get there) - - print('\n'.join(['.short ' + x for x in barray_to_u16_hex(ctx.data[ctx.ptr:])])) + if ctx.ptr < len(ctx.data): + if (len(ctx.data) - ctx.ptr) % 2 != 0: + print(ctx.data[ctx.ptr:]) + # TODO error + return + print('\n'.join(['.short ' + x for x in barray_to_u16_hex(ctx.data[ctx.ptr:])])) def generate_macros(): @@ -316,6 +317,7 @@ def main(): if input_data.strip() == 'macros': generate_macros() return - disassemble_script(input_data) + disassemble_script(bytearray.fromhex(input_data)) -main() \ No newline at end of file +if __name__ == '__main__': + main() \ No newline at end of file