Create parser that resolves the .incbin macros

This commit is contained in:
octorock 2021-03-04 22:09:44 +01:00
parent e94df6de4a
commit d9034ef09a
2 changed files with 160 additions and 111 deletions

View File

@ -0,0 +1,47 @@
# This python script reads the script.s file which contains all the .incbin macros
# Then it fetches the corresponding data of the baserom, o
TMC_FOLDER='../..'
import subprocess
from script_disassembler import disassemble_script
def main():
# read baserom data
with open(f'{TMC_FOLDER}/baserom.gba', 'rb') as baserom:
baserom_data = bytearray(baserom.read())
# read scripts.s with incbins
with open(f'{TMC_FOLDER}/data/scripts.s', 'r') as scripts:
while True:
line = scripts.readline()
if not line:
# end of file
break
if ':: @' in line: # this might be a label
incbin_line = scripts.readline()
if '.incbin' in incbin_line: # found a label with incbin
label = line.split('::')[0]
(_, start, end) = incbin_line.split(',')
start = int(start, 16)
end = int(end, 16)
# read data from rom
data = baserom_data[start:start+end]
print(f'DISASM {label}')
disassemble_script(data)
# print new include label
print(f'.include "data/scripts/{label}.inc"')
else:
print(line, end='')
print(incbin_line, end='')
else:
print(line, end='')
if __name__ == '__main__':
main()

View File

@ -26,26 +26,6 @@ def barray_to_u32_hex(barray):
integers = struct.unpack('I'*count, barray) integers = struct.unpack('I'*count, barray)
return [u32_to_hex(x) for x in integers] return [u32_to_hex(x) for x in integers]
"""
def GetNextScriptCommandWordAfterCommandMetadata(ctx):
return struct.unpack('I', ctx.data[ctx.ptr+2:ctx.ptr+6])[0]
def sub_0807E10C(ctx):
meta = GetNextScriptCommandWordAfterCommandMetadata(ctx)
print('function', hex(meta))
def sub_0807E5F8(ctx):
curPtr = start + ctx.ptr
print('current', hex(curPtr))
# gUnk_02033280.unk_00 |= GetNextScriptCommandWordAfterCommandMetadata(unk2->unk_00.raw);
meta = GetNextScriptCommandWordAfterCommandMetadata(ctx)
print('meta', meta)
newPtr = curPtr | meta
print('Start next script at: ', hex(newPtr))
"""
commands = [ commands = [
{'fun': 'nullsub_507', 'params': ''}, {'fun': 'nullsub_507', 'params': ''},
{'fun': 'sub_0807E004', 'params': '', 'name': 'start executing scripts'}, {'fun': 'sub_0807E004', 'params': '', 'name': 'start executing scripts'},
@ -65,127 +45,127 @@ commands = [
{'fun': 'sub_0807E188', 'params': 's'}, {'fun': 'sub_0807E188', 'params': 's'},
{'fun': 'sub_0807E1D8', 'params': 's'}, {'fun': 'sub_0807E1D8', 'params': 's'},
{'fun': 'sub_0807E4AC', 'params': 's'}, {'fun': 'sub_0807E4AC', 'params': 's'},
{'fun': 'sub_0807E200'}, {'fun': 'sub_0807E200', 'params': 'w'},
{'fun': 'sub_0807E220'}, {'fun': 'sub_0807E220', 'params': 'ss'},
{'fun': 'sub_0807E240'}, {'fun': 'sub_0807E240', 'params': 's'},
{'fun': 'sub_0807E260', 'params': 's'}, {'fun': 'sub_0807E260', 'params': 's'},
{'fun': 'sub_0807E280'}, {'fun': 'sub_0807E280', 'params': 'sss'},
{'fun': 'sub_0807E2A8', 'params': 's'}, {'fun': 'sub_0807E2A8', 'params': 's'},
{'fun': 'sub_0807E2E4', 'params': ''}, {'fun': 'sub_0807E2E4', 'params': ''},
{'fun': 'sub_0807E30C', 'params': ''}, {'fun': 'sub_0807E30C', 'params': ''},
{'fun': 'sub_0807E390'}, {'fun': 'sub_0807E390', 'params': 's'},
{'fun': 'sub_0807E3BC'}, {'fun': 'sub_0807E3BC', 'params': 's'},
{'fun': 'sub_0807E3E8'}, {'fun': 'sub_0807E3E8', 'params': ''},
{'fun': 'sub_0807E40C', 'params': 's'}, {'fun': 'sub_0807E40C', 'params': 's'},
{'fun': 'sub_0807E42C'}, {'fun': 'sub_0807E42C', 'params': 'ss'},
{'fun': 'sub_0807E48C'}, {'fun': 'sub_0807E48C', 'params': 's'},
{'fun': 'sub_0807E4CC'}, {'fun': 'sub_0807E4CC', 'params': 'w'},
{'fun': 'sub_0807E4EC'}, {'fun': 'sub_0807E4EC', 'params': 'w'},
{'fun': 'sub_0807E514'}, {'fun': 'sub_0807E514', 'params': 's'},
{'fun': 'sub_0807E538'}, {'fun': 'sub_0807E538', 'params':'w'},
{'fun': 'sub_0807E564'}, {'fun': 'sub_0807E564', 'params': ''},
{'fun': 'sub_0807E584'}, {'fun': 'sub_0807E584', 'params': ''},
{'fun': 'sub_0807E5A0'}, {'fun': 'sub_0807E5A0', 'params': 's'},
{'fun': 'sub_0807E5CC'}, {'fun': 'sub_0807E5CC', 'params': 's'},
{'fun': 'sub_0807E5F8', 'params': 'w'},# 'exec': sub_0807E5F8}, {'fun': 'sub_0807E5F8', 'params': 'w'},# 'exec': sub_0807E5F8},
{'fun': 'sub_0807E610'}, {'fun': 'sub_0807E610', 'params': 'w'},
{'fun': 'sub_0807E628'}, {'fun': 'sub_0807E628', 'params': 's'},
{'fun': 'sub_0807E634'}, {'fun': 'sub_0807E634', 'params': 'ss'},
{'fun': 'sub_0807E644'}, {'fun': 'sub_0807E644', 'params': 's'},
{'fun': 'sub_0807E650', 'params': 's'}, {'fun': 'sub_0807E650', 'params': 's'},
{'fun': 'sub_0807E65C'}, {'fun': 'sub_0807E65C', 'params': 's'},
{'fun': 'sub_0807E668'}, {'fun': 'sub_0807E668', 'params': 's'},
{'fun': 'sub_0807E674'}, {'fun': 'sub_0807E674', 'params': 's'},
{'fun': 'sub_0807E680', 'params': 's'}, {'fun': 'sub_0807E680', 'params': 's'},
{'fun': 'sub_0807E690'}, {'fun': 'sub_0807E690', 'params': 'w'},
{'fun': 'sub_0807E6AC', 'params': 'w'}, {'fun': 'sub_0807E6AC', 'params': 'w'},
{'fun': 'sub_0807E6DC', 'params': ''}, {'fun': 'sub_0807E6DC', 'params': ''},
{'fun': 'sub_0807E700'}, {'fun': 'sub_0807E700', 'params': ''},
{'fun': 'sub_0807E72C'}, {'fun': 'sub_0807E72C', 'params': ''},
{'fun': 'sub_0807E75C'}, {'fun': 'sub_0807E75C', 'params': ''},
{'fun': 'sub_0807E778'}, {'fun': 'sub_0807E778', 'params': ''},
{'fun': 'sub_0807E788'}, {'fun': 'sub_0807E788', 'params': 'w'},
{'fun': 'sub_0807E79C'}, {'fun': 'sub_0807E79C', 'params': ''},
{'fun': 'nullsub_508'}, {'fun': 'nullsub_508', 'params': 's'},
{'fun': 'sub_0807E7B0'}, {'fun': 'sub_0807E7B0', 'params': ''},
{'fun': 'sub_0807E7C4'}, {'fun': 'sub_0807E7C4', 'params': ''},
{'fun': 'sub_0807E7D8'}, {'fun': 'sub_0807E7D8', 'params': ''},
{'fun': 'sub_0807E7EC'}, {'fun': 'sub_0807E7EC', 'params': ''},
{'fun': 'sub_0807E800'}, {'fun': 'sub_0807E800', 'params': 'w'},
{'fun': 'sub_0807E80C'}, {'fun': 'sub_0807E80C', 'params': 'w'},
{'fun': 'sub_0807E858'}, {'fun': 'sub_0807E858', 'params': 's'},
{'fun': 'sub_0807E864', 'params': ''}, {'fun': 'sub_0807E864', 'params': ''},
{'fun': 'sub_0807E878', 'params': ''}, {'fun': 'sub_0807E878', 'params': ''},
{'fun': 'sub_0807E888'}, {'fun': 'sub_0807E888', 'params': ''},
{'fun': 'sub_0807E898'}, {'fun': 'sub_0807E898', 'params': 'w'},
{'fun': 'sub_0807E8C4', 'params': 'w'}, {'fun': 'sub_0807E8C4', 'params': 'w'},
{'fun': 'sub_0807E8D4'}, {'fun': 'sub_0807E8D4', 'params': 's'},
{'fun': 'sub_0807E8E4_0', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_0', 'params': ''}, # duplicate
{'fun': 'sub_0807E8E4_1', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_1', 'params': ''}, # duplicate
{'fun': 'sub_0807E8E4_2', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_2', 'params': ''}, # duplicate
{'fun': 'sub_0807E8E4_3', 'params': ''}, # duplicate {'fun': 'sub_0807E8E4_3', 'params': ''}, # duplicate
{'fun': 'sub_0807E908'}, {'fun': 'sub_0807E908', 'params': 's'},
{'fun': 'sub_0807E914'}, {'fun': 'sub_0807E914', 'params': 'w'},
{'fun': 'sub_0807E924'}, {'fun': 'sub_0807E924', 'params': ''},
{'fun': 'sub_0807E930', 'params': 's'}, {'fun': 'sub_0807E930', 'params': 's'},
{'fun': 'sub_0807E944', 'params': ''}, {'fun': 'sub_0807E944', 'params': ''},
{'fun': 'sub_0807E974'}, {'fun': 'sub_0807E974', 'params': 's'},
{'fun': 'sub_0807E9D4', 'params': ''}, {'fun': 'sub_0807E9D4', 'params': ''},
{'fun': 'sub_0807E9DC', 'params': ''}, {'fun': 'sub_0807E9DC', 'params': ''},
{'fun': 'sub_0807E9E4'}, {'fun': 'sub_0807E9E4', 'params': 's'},
{'fun': 'sub_0807E9F0', 'params': ''}, {'fun': 'sub_0807E9F0', 'params': ''},
{'fun': 'sub_0807EA4C', 'params': ''}, {'fun': 'sub_0807EA4C', 'params': ''},
{'fun': 'sub_0807EA88'}, {'fun': 'sub_0807EA88', 'params': 's'},
{'fun': 'sub_0807EA94', 'params': ''}, {'fun': 'sub_0807EA94', 'params': ''},
{'fun': 'sub_0807EAB4', 'params': 's'}, {'fun': 'sub_0807EAB4', 'params': 's'},
{'fun': 'sub_0807EAC0'}, {'fun': 'sub_0807EAC0', 'params': 's'},
{'fun': 'sub_0807EAD0', 'params': 'ss'}, {'fun': 'sub_0807EAD0', 'params': 'ss'},
{'fun': 'sub_0807EAF0'}, {'fun': 'sub_0807EAF0', 'params': 'w'},
{'fun': 'sub_0807EB18'}, {'fun': 'sub_0807EB18', 'params': ''},
{'fun': 'sub_0807EB28', 'params': 's'}, {'fun': 'sub_0807EB28', 'params': 's'},
{'fun': 'sub_0807EB38'}, {'fun': 'sub_0807EB38', 'params': ''},
{'fun': 'sub_0807EB44', 'params': 's'}, {'fun': 'sub_0807EB44', 'params': 's'},
{'fun': 'sub_0807EB4C'}, {'fun': 'sub_0807EB4C', 'params': 'ss'},
{'fun': 'sub_0807EB74', 'params': ''}, {'fun': 'sub_0807EB74', 'params': ''},
{'fun': 'sub_0807EB8C', 'params': ''}, {'fun': 'sub_0807EB8C', 'params': ''},
{'fun': 'sub_0807EBA8'}, {'fun': 'sub_0807EBA8', 'params': 's'},
{'fun': 'sub_0807EBB0'}, {'fun': 'sub_0807EBB0', 'params': 's'},
{'fun': 'sub_0807EBC0', 'params': 's'}, {'fun': 'sub_0807EBC0', 'params': 's'},
{'fun': 'sub_0807EBC8'}, {'fun': 'sub_0807EBC8', 'params': 'w'},
{'fun': 'sub_0807EBD8'}, {'fun': 'sub_0807EBD8', 'params': 'w'},
{'fun': 'sub_0807EBF4'}, {'fun': 'sub_0807EBF4', 'params': 'ss'},
{'fun': 'sub_0807EC08'}, {'fun': 'sub_0807EC08', 'params': ''},
{'fun': 'nullsub_509'}, {'fun': 'nullsub_509', 'params': ''},
{'fun': 'sub_0807EC1C', 'params': 's'}, {'fun': 'sub_0807EC1C', 'params': 's'},
{'fun': 'sub_0807EC64'}, {'fun': 'sub_0807EC64', 'params': 's'},
{'fun': 'sub_0807EC94'}, {'fun': 'sub_0807EC94', 'params': 's'},
{'fun': 'sub_0807ECC4'}, {'fun': 'sub_0807ECC4', 'params': 's'},
{'fun': 'sub_0807ECF4', 'params': 's'}, {'fun': 'sub_0807ECF4', 'params': 's'},
{'fun': 'sub_0807ED24'}, {'fun': 'sub_0807ED24', 'params': 'sss'},
{'fun': 'sub_0807EDD4', 'params': 'ss'}, {'fun': 'sub_0807EDD4', 'params': 'ss'},
{'fun': 'sub_0807EE04'}, {'fun': 'sub_0807EE04', 'params': 'ss'},
{'fun': 'sub_0807EE30', 'params': ''}, {'fun': 'sub_0807EE30', 'params': ''},
{'fun': 'sub_0807EEB4'}, {'fun': 'sub_0807EEB4', 'params': ''},
{'fun': 'sub_0807EEF4'}, {'fun': 'sub_0807EEF4', 'params': ''},
{'fun': 'sub_0807EF3C'}, {'fun': 'sub_0807EF3C', 'params': 'ss'},
{'fun': 'sub_0807EF80', 'params': 's'}, {'fun': 'sub_0807EF80', 'params': 's'},
{'fun': 'sub_0807EF90'}, {'fun': 'sub_0807EF90', 'params': 's'},
{'fun': 'sub_0807EFA0'}, {'fun': 'sub_0807EFA0', 'params': 's'},
{'fun': 'sub_0807EFAC', 'params': 's'}, {'fun': 'sub_0807EFAC', 'params': 's'},
{'fun': 'sub_0807EFD4'}, {'fun': 'sub_0807EFD4', 'params': 'w'},
{'fun': 'sub_0807EFE4', 'params': ''}, {'fun': 'sub_0807EFE4', 'params': ''},
{'fun': 'sub_0807EFF4'}, {'fun': 'sub_0807EFF4', 'params': 's'},
{'fun': 'sub_0807F004'}, {'fun': 'sub_0807F004', 'params': 's'},
{'fun': 'sub_0807F010'}, {'fun': 'sub_0807F010', 'params': 's'},
{'fun': 'sub_0807F034', 'params': 's'}, {'fun': 'sub_0807F034', 'params': 's'},
{'fun': 'sub_0807F050'}, {'fun': 'sub_0807F050', 'params': 's'},
{'fun': 'sub_0807F060'}, {'fun': 'sub_0807F060', 'params': 's'},
{'fun': 'sub_0807F078'}, {'fun': 'sub_0807F078', 'params': 'ss'},
{'fun': 'sub_0807F088'}, {'fun': 'sub_0807F088', 'params': 's'},
{'fun': 'sub_0807F098', 'params': ''}, {'fun': 'sub_0807F098', 'params': ''},
{'fun': 'sub_0807F0A4'}, {'fun': 'sub_0807F0A4', 'params': ''},
{'fun': 'sub_0807F0B4', 'params': 's'}, {'fun': 'sub_0807F0B4', 'params': 's'},
{'fun': 'sub_0807F0C8'} {'fun': 'sub_0807F0C8', 'params': 'ss'}
] ]
# definitions for parameters # definitions for parameters
@ -220,6 +200,12 @@ parameters = {
'expr': ' .word \\a\n .word \\b', 'expr': ' .word \\a\n .word \\b',
'read': lambda ctx: ', '.join(barray_to_u32_hex(ctx.data[ctx.ptr+2:ctx.ptr+10])) 'read': lambda ctx: ', '.join(barray_to_u32_hex(ctx.data[ctx.ptr+2:ctx.ptr+10]))
}, },
'sss': {
'length': 3,
'param': 'a,b,c',
'expr': ' .short \\a\n .short \\b\n .short \\c',
'read': lambda ctx: ', '.join(barray_to_u16_hex(ctx.data[ctx.ptr+2:ctx.ptr+8]))
},
} }
@ -229,14 +215,20 @@ def ExecuteScriptCommandSet(ctx: Context):
return 0 return 0
unk_06 = cmd >> 0xA unk_06 = cmd >> 0xA
if unk_06 == 0:
# TODO error
return 0
operationId = cmd & 0x3FF operationId = cmd & 0x3FF
#print(cmd) if operationId >= len(commands):
#print('advance by', unk_06) print('ERR: NO CMD')
#print('command id', operationId) # TODO error
return 0
command = commands[operationId] command = commands[operationId]
#print(command)
param_length = unk_06 - 1 param_length = unk_06 - 1
if unk_06 > 1: if unk_06 > 1:
if ctx.ptr+2*unk_06 >= len(ctx.data):
# TODO raise Exception(f'Not enough data to fetch {unk_06-1} params')
return 0
meta = struct.unpack( meta = struct.unpack(
'H'*(unk_06-1), ctx.data[ctx.ptr+2:ctx.ptr+2*unk_06]) 'H'*(unk_06-1), ctx.data[ctx.ptr+2:ctx.ptr+2*unk_06])
#print('meta', meta) #print('meta', meta)
@ -248,7 +240,11 @@ def ExecuteScriptCommandSet(ctx: Context):
raise Exception('Parameter configuration ' + command['params'] + ' not defined') raise Exception('Parameter configuration ' + command['params'] + ' not defined')
params = parameters[command['params']] params = parameters[command['params']]
if unk_06-1 != params['length']: if unk_06-1 != params['length']:
raise Exception(f'Call {command["fun"]} with ' + str(unk_06-1) +' length, while length of ' + str(params['length'])+' defined') # TODOraise Exception(f'Call {command["fun"]} with ' + str(unk_06-1) +' length, while length of ' + str(params['length'])+' defined')
# TEMPORARY WORKAROUND:
print(ctx.ptr, unk_06, cmd)
ctx.ptr += unk_06*2
return 1
print(command['fun'] + ' ' + params['read'](ctx)) print(command['fun'] + ' ' + params['read'](ctx))
@ -258,18 +254,23 @@ def ExecuteScriptCommandSet(ctx: Context):
return 1 return 1
def disassemble_script(input_data): def disassemble_script(input_bytes):
input_bytes = bytearray.fromhex(input_data)
ctx = Context(0, input_bytes) ctx = Context(0, input_bytes)
while True: while True:
if ctx.ptr >= len(ctx.data) - 1: # End of file (there need to be at least two bytes remaining for the next operation id)
break
if not ExecuteScriptCommandSet(ctx): if not ExecuteScriptCommandSet(ctx):
break break
# Print rest (did not manage to get there) # Print rest (did not manage to get there)
if ctx.ptr < len(ctx.data):
if (len(ctx.data) - ctx.ptr) % 2 != 0:
print(ctx.data[ctx.ptr:])
# TODO error
return
print('\n'.join(['.short ' + x for x in barray_to_u16_hex(ctx.data[ctx.ptr:])])) print('\n'.join(['.short ' + x for x in barray_to_u16_hex(ctx.data[ctx.ptr:])]))
@ -316,6 +317,7 @@ def main():
if input_data.strip() == 'macros': if input_data.strip() == 'macros':
generate_macros() generate_macros()
return return
disassemble_script(input_data) disassemble_script(bytearray.fromhex(input_data))
main() if __name__ == '__main__':
main()