tp/tools/utilities/beautify_anm_data.py

539 lines
18 KiB
Python

#
# Version v2.0
#
# Author: YunataSavior
# Brief: Converts byte-array anm data into their proper forms.
#
# Used for decompiling d_a_npc_ETC TUs.
#
import fire
import os
import re
import sys
import struct
FACE_MOTION_TYPE = "daNpcT_faceMotionAnmData_c l_faceMotionAnmData"
FACE_MOTION_PATTERN = r'SECTION_DATA static u8 l_faceMotionAnmData\[\d+\] = {'
MOTION_TYPE = "daNpcT_motionAnmData_c l_motionAnmData"
MOTION_PATTERN = r'SECTION_DATA static u8 l_motionAnmData\[\d+\] = {'
SEQ_FACE_MOTION_TYPE = "daNpcT_MotionSeqMngr_c::sequenceStepData_c l_faceMotionSequenceData"
SEQ_FACE_MOTION_PATTERN = r'SECTION_DATA static u8 l_faceMotionSequenceData\[\d+\] = {'
SEQ_MOTION_TYPE = "daNpcT_MotionSeqMngr_c::sequenceStepData_c l_motionSequenceData"
SEQ_MOTION_PATTERN = r'SECTION_DATA static u8 l_motionSequenceData\[\d+\] = {'
HEAP_SIZE_TYPE = "int const heapSize"
HEAP_SIZE_PATTERN = r'SECTION_RODATA static u8 const heapSize\[\d+\] = {'
BMD_DATA_TYPE = "int l_bmdData"
BMD_DATA_PATTERN = r'SECTION_DATA static u8 l_bmdData\[\d+\] = {'
PARAM_TYPE = "::m"
PARAM_PATTERN = r'SECTION_RODATA u8 const (\w+_Param_c)::m\[\d+\] = {'
EVT_LIST_PATTERN = r'SECTION_DATA static void\* l_evtList\[\d+\] = {'
RES_NAME_PATTERN = r'SECTION_DATA static void\* l_resNameList\[\d+\] = {'
CUT_NAME_PATTERN = r'SECTION_DATA void\* ([a-zA-Z_][a-zA-Z0-9_]*)::mCutNameList\[(\d+)\] = \{'
VOID_PTR_INT_PATTERN = r'\(void\*\)(NULL|0x[0-9A-Fa-f]+)'
STRING_BASE_PATTERN = r'^SECTION_DEAD static char const\* const stringBase_([0-9A-Fa-f]+)\s*=\s*(".*?");'
STR_NO_OFFSET_PATTERN = r'\(void\*\)&([a-zA-Z0-9_]+)__stringBase0'
STR_WITH_OFFSET_PATTERN = r'\(void\*\)\(\(\(char\*\)&([a-zA-Z0-9_]+)__stringBase0\) \+ 0x([0-9A-Fa-f]+)\)'
def unsigned_val(hexstr):
value = int(hexstr, 16)
return value
def twos_complement(hexstr, bits):
# https://stackoverflow.com/questions/6727875/hex-string-to-signed-int-in-python
value = int(hexstr, 16)
if value & (1 << (bits - 1)):
value -= 1 << bits
return value
def prm_is_float(hex_str):
value = int(hex_str, 16)
exponent_raw = (value >> 23) & 0xFF # Get bits 30-23
exponent_actual = exponent_raw - 127 # Remove bias
# print(exponent_actual)
EXP_TOLERANCE = 10
if exponent_actual < EXP_TOLERANCE * -1:
return False
if exponent_actual > EXP_TOLERANCE:
return False
return True
# Expects NO leading "0x":
def hex_to_float(hex_str):
return struct.unpack('!f', bytes.fromhex(hex_str))[0]
def float_to_hex(f):
[d] = struct.unpack(">I", struct.pack(">f", f))
return f"{d:08X}"
def handle_npc_param(byte_collection, param_name, type, no_auto_float):
my_len = len(byte_collection)
if my_len % 4 != 0:
print(f"Error: len() = '{my_len}' isn't divisble by 4")
sys.exit(1)
# Special handling.
instr_arr = []
common_name = ""
is_raw = False
if type is None:
print("ERROR: --type <type> must be specified")
sys.exit(1)
elif type == "raw":
print("// Make sure the type isn't actually daNpcT nor daNpcF.")
is_raw = True
elif type == "daNpcT":
common_name = "daNpcT_HIOParam"
instr_arr = [
"f4", # attention_offset;
"f4", # gravity;
"f4", # scale;
"f4", # real_shadow_size;
"f4", # weight;
"f4", # height;
"f4", # knee_length;
"f4", # width;
"f4", # body_angleX_max;
"f4", # body_angleX_min;
"f4", # body_angleY_max;
"f4", # body_angleY_min;
"f4", # head_angleX_max;
"f4", # head_angleX_min;
"f4", # head_angleY_max;
"f4", # head_angleY_min;
"f4", # neck_rotation_ratio;
"f4", # morf_frame;
"s2", # talk_distance;
"s2", # talk_angle;
"s2", # attention_distance;
"s2", # attention_angle;
"f4", # fov;
"f4", # search_distance;
"f4", # search_height;
"f4", # search_depth;
"s2", # attention_time;
"s2", # damage_time;
"s2", # face_expression;
"s2", # motion;
"s2", # look_mode;
"u1", # debug_mode_ON;
"u1", # debug_info_ON;
"f4", # expression_morf_frame;
"f4", # box_min_x;
"f4", # box_min_y;
"f4", # box_min_z;
"f4", # box_max_x;
"f4", # box_max_y;
"f4", # box_max_z;
"f4", # box_offset;
]
elif type == "daNpcF":
# TODO
print("ERROR: --type daNpcF is currently unsupported.")
print(" You can try to implement it yourself.")
print(" If you need help with this, please seek the help of yunatasavior on Discord.")
sys.exit(1)
else:
print(f"ERROR: --type {type} is unsupported. Please review the script to see which are supported.")
sys.exit(1)
ptr = 0
cur_instr = 0
hexstr = ""
res_array = []
common_size = 0
prms_is_float: list[bool] = []
while ptr < my_len:
curbyte = byte_collection[ptr]
ptr += 1
if curbyte[:2] != "0x" or len(curbyte) != 4:
print(f"Error: '{curbyte}' isn't formatted as a byte")
sys.exit(1)
hexstr += curbyte[-2:]
my_type = "h"
exp_bytes = 4
if cur_instr < len(instr_arr):
my_type = instr_arr[cur_instr][0]
exp_bytes = int(instr_arr[cur_instr][1])
if len(hexstr) / 2 == exp_bytes:
if my_type == 'u':
val = unsigned_val(hexstr)
res_array.append(val)
elif my_type == 's':
val = twos_complement(hexstr, exp_bytes*8)
res_array.append(val)
elif my_type == 'h':
if is_raw or common_size != 0:
if no_auto_float:
prms_is_float.append(False)
else:
prms_is_float.append(prm_is_float(hexstr))
res_array.append("0x" + hexstr)
elif my_type == 'f':
fvalue = hex_to_float(hexstr)
fvalue = round(fvalue, 6)
chk_val = float_to_hex(fvalue)
assert chk_val == hexstr, f"fvalue({fvalue}): chk_val {chk_val} != hexstr {hexstr}"
res_array.append(fvalue)
else:
print(f"Error: unknown type '{my_type}'")
sys.exit(1)
hexstr = ""
if not is_raw and common_size == 0:
cur_instr += 1
if not is_raw and cur_instr == len(instr_arr):
if common_size == 0:
common_size = ptr
assert is_raw or common_size != 0, "Param array is too short for specified type"
hio_name = re.sub(r'_Param_c$', '_HIOParam', param_name)
res_str = "// Must be put OUTSIDE {}:\n".format(param_name)
res_str += "struct {} {{\n".format(hio_name)
idx = common_size
if not is_raw:
res_str += " /* 0x00 */ {} common;\n".format(common_name)
while idx < my_len:
upper = f'{idx:02X}'
lower = f'{idx:02x}'
mych = 'f' if (prms_is_float[int((idx-common_size) / 4)] is True) else 'u'
res_str += " /* 0x{} */ {}32 field_0x{};\n".format(upper, mych, lower)
idx += 4
res_str += "};\n\n"
res_str += "// Must be put INSIDE {}:\n".format(param_name)
res_str += "static const {} m;\n\n".format(hio_name)
res_str += "const {} {}::m".format(hio_name, param_name)
res_str += " = {\n"
cur_pos = 0
for value in res_array:
res_str += " "
if isinstance(value, int):
res_str += str(value)
elif isinstance(value, float):
res_str += str(value) + "f"
else:
adj_pos = cur_pos - len(instr_arr)
if adj_pos >= 0 and prms_is_float[adj_pos] is True:
fvalue = hex_to_float(value[2:])
fvalue = round(fvalue, 6)
res_str += f"{fvalue}f"
chk_val = "0x" + float_to_hex(fvalue)
# Sanity check in case rounding is too aggressive:
assert chk_val == value, f"chk_val {chk_val} != value {value}"
# if prmfloat_dbg is True:
# res_str += f" // {value}"
else:
res_str += value
res_str += ",\n"
cur_pos += 1
res_str += "};\n"
print(res_str)
def build_anm_struct(byte_collection, anm_type):
my_len = len(byte_collection)
piece_size = 1
instr_arr = []
is_array = False
if anm_type is PARAM_TYPE:
# Special handling.
if my_len % 4 != 0:
print(f"Error: len() = '{my_len}' isn't divisble by 4")
sys.exit(1)
piece_size = 4
instr_arr = ["h4"]
else:
if anm_type is FACE_MOTION_TYPE:
piece_size = 28
instr_arr = ["s4", "s4", "s4", "s4", "s4", "s4", "s4"]
is_array = True
elif anm_type is MOTION_TYPE:
piece_size = 28
instr_arr = ["s4", "s4", "s4", "s4", "s4", "s4", "s2", "s2"]
is_array = True
elif anm_type is SEQ_FACE_MOTION_TYPE or anm_type is SEQ_MOTION_TYPE:
piece_size = 4
instr_arr = ["s2", "s1", "s1"]
is_array = True
elif anm_type is HEAP_SIZE_TYPE:
piece_size = 4
instr_arr = ["h4"]
elif anm_type is BMD_DATA_TYPE:
piece_size = 8
instr_arr = ["s4", "s4"]
is_array = True
if my_len % piece_size != 0:
print(f"Error: len() = '{my_len}' isn't divisble by '{piece_size}'")
sys.exit(1)
ptr = 0
cur_instr = 0
hexstr = ""
full_res_arr = []
pos_arr = []
while ptr < my_len:
curbyte = byte_collection[ptr]
ptr += 1
if curbyte[:2] != "0x" or len(curbyte) != 4:
print(f"Error: '{curbyte}' isn't formatted as a byte")
sys.exit(1)
hexstr += curbyte[-2:]
exp_bytes = int(instr_arr[cur_instr][1])
if len(hexstr) / 2 == exp_bytes:
my_type = instr_arr[cur_instr][0]
if my_type == 's':
val = twos_complement(hexstr, exp_bytes*8)
pos_arr.append(val)
elif my_type == 'h':
if anm_type is HEAP_SIZE_TYPE:
trimmed = hexstr.lstrip('0')
hexstr = trimmed if trimmed else '0'
pos_arr.append("0x" + hexstr)
else:
print(f"Error: unknown type '{my_type}'")
sys.exit(1)
hexstr = ""
cur_instr += 1
if cur_instr == len(instr_arr):
cur_instr = 0
full_res_arr.append(pos_arr.copy())
pos_arr.clear()
res_str = ""
res_str += "static {}".format(anm_type)
res_len = my_len / piece_size
cutoff_num = 1
if anm_type is SEQ_FACE_MOTION_TYPE or anm_type is SEQ_MOTION_TYPE:
cutoff_num = 8
elif anm_type is HEAP_SIZE_TYPE:
cutoff_num = 4
res_str += "[{}]".format(int(res_len))
if anm_type == BMD_DATA_TYPE:
res_str += "[2]"
res_str += " = {\n"
cur_in_line = 0
cur_idx = 0
for my_arr in full_res_arr:
if cur_in_line == 0:
res_str += " "
if is_array is True:
res_str += "{"
in_mid = False
for value in my_arr:
if in_mid is False:
in_mid = True
else:
res_str += ", "
if isinstance(value, int):
res_str += str(value)
elif isinstance(value, float):
res_str += str(value) + "f"
else:
res_str += value
if is_array is True:
res_str += "}"
res_str += ","
cur_in_line += 1
if cur_in_line == cutoff_num:
cur_in_line = 0
res_str += "\n"
else:
res_str += " "
cur_idx += 1
if cur_in_line != 0:
res_str += "\n"
res_str += "};\n"
print(res_str)
def handle_charptr_array(int_collection, charptr_collection, charptr_type, class_name):
res_str = ""
if charptr_type == "l_evtList":
assert len(charptr_collection) == len(int_collection)
res_str += f"static daNpcT_evtData_c l_evtList[{len(charptr_collection)}] = {{\n"
elif charptr_type == "l_resNameList":
res_str += f"static char* l_resNameList[{len(charptr_collection)}] = {{\n"
elif charptr_type == "mCutNameList":
res_str += f"char* {class_name}::mCutNameList[{len(charptr_collection)}] = {{\n"
else:
raise Exception(f"Unknown charptr_type \"{charptr_type}\"")
for idx in range(len(charptr_collection)):
if charptr_type == "l_evtList":
res_str += f" {{{charptr_collection[idx]}, {int_collection[idx]}}},\n"
else:
res_str += f" {charptr_collection[idx]},\n"
res_str += "};\n"
print(res_str)
def run_beautify_anm_data(in_file, type=None, no_auto_float=False):
# Check if the file exists
if not os.path.isfile(in_file):
print(f"Error: File '{in_file}' not found.")
sys.exit(1)
fDesc = open(in_file, 'r')
fConts = fDesc.read()
lines = fConts.splitlines()
in_byte_array = False
in_charptr_array = False
byte_collection = []
charptr_collection = []
int_collection = []
anm_type = ""
charptr_type = ""
param_name = ""
class_name = ""
str_lit_start = 0
strlit_map = {}
for line in lines:
words = line.split()
if len(words) == 0:
continue
sb_match = re.search(STRING_BASE_PATTERN, line)
if sb_match:
addr = sb_match.group(1)
string_lit = sb_match.group(2)
addr_int = unsigned_val(addr)
offset = 0
if str_lit_start == 0:
str_lit_start = addr_int
else:
offset = addr_int - str_lit_start
# print(f"Address: 0x{addr}, Offset: {offset} String: {string_lit}")
strlit_map[offset] = string_lit
continue
if in_byte_array is False and in_charptr_array is False:
if re.search(FACE_MOTION_PATTERN, line):
in_byte_array = True
anm_type = FACE_MOTION_TYPE
elif re.search(MOTION_PATTERN, line):
in_byte_array = True
anm_type = MOTION_TYPE
elif re.search(SEQ_FACE_MOTION_PATTERN, line):
in_byte_array = True
anm_type = SEQ_FACE_MOTION_TYPE
elif re.search(SEQ_MOTION_PATTERN, line):
in_byte_array = True
anm_type = SEQ_MOTION_TYPE
elif re.search(HEAP_SIZE_PATTERN, line):
in_byte_array = True
anm_type = HEAP_SIZE_TYPE
elif re.search(BMD_DATA_PATTERN, line):
in_byte_array = True
anm_type = BMD_DATA_TYPE
elif re.search(EVT_LIST_PATTERN, line):
in_charptr_array = True
charptr_type = "l_evtList"
elif re.search(RES_NAME_PATTERN, line):
in_charptr_array = True
charptr_type = "l_resNameList"
elif match := re.search(CUT_NAME_PATTERN, line):
in_charptr_array = True
class_name = match.group(1)
# print(f"Class: {class_name}, Array Size: {match.group(2)}")
charptr_type = "mCutNameList"
else:
match = re.search(PARAM_PATTERN, line)
if match:
in_byte_array = True
anm_type = PARAM_TYPE
param_name = match.group(1)
else:
if words[0] == '};':
if in_byte_array:
if anm_type == PARAM_TYPE:
handle_npc_param(byte_collection, param_name, type, no_auto_float)
else:
build_anm_struct(byte_collection, anm_type)
in_byte_array = False
byte_collection.clear()
else:
handle_charptr_array(int_collection, charptr_collection, charptr_type, class_name)
int_collection.clear()
charptr_collection.clear()
in_charptr_array = False
else:
if in_byte_array:
bytes = re.split(r'[,\s]+', line)
for byte in bytes:
if (len(byte) == 0):
continue
byte_collection.append(byte)
else:
if re.search(STR_NO_OFFSET_PATTERN, line):
# symbol = match_no_offset.group(1)
offset = 0
# print(f"Symbol: {symbol}, Offset: {offset}")
charptr_collection.append(strlit_map[offset])
elif match_offset := re.search(STR_WITH_OFFSET_PATTERN, line):
# symbol = match_offset.group(1)
offset = int(match_offset.group(2), 16)
# print(f"Symbol: {symbol}, Offset: {offset}")
charptr_collection.append(strlit_map[offset])
elif vip_match := re.search(VOID_PTR_INT_PATTERN, line):
assert charptr_type == "l_evtList"
value_str = vip_match.group(1)
if value_str == 'NULL':
value = 0
else:
value = int(value_str, 16)
# print(f"Parsed Value: {value}")
int_collection.append(value)
else:
raise Exception(f"ptr parsing: unknown line type: \"{line}\"")
# End of for loop.
fDesc.close()
if __name__ == "__main__":
fire.Fire(run_beautify_anm_data)