diff --git a/tools/binary_funcs.py b/tools/converters/binary_funcs.py similarity index 82% rename from tools/binary_funcs.py rename to tools/converters/binary_funcs.py index c0b79eddfdd..fb5f9943e85 100644 --- a/tools/binary_funcs.py +++ b/tools/converters/binary_funcs.py @@ -48,15 +48,14 @@ def read_f32(binary_file: BinaryIO) -> int: return struct.unpack(">f", chunk)[0] def read_bytes_until_null(binary_file: BinaryIO) -> bytes: - str_length = 0 - while True: - char = binary_file.read(1) - if char == b"\0": - break - else: - str_length += 1 - - binary_file.seek(-(str_length+1), os.SEEK_CUR) - string = binary_file.read(str_length) - - return string + begin = binary_file.tell() + while True: + b = binary_file.read(1) + if len(b) == 0: + raise EOFError() + if b[0] == 0: + break + + str_length = binary_file.tell() - begin - 1 + binary_file.seek(begin) + return binary_file.read(str_length) diff --git a/tools/converters/res_arc.py b/tools/converters/res_arc.py new file mode 100644 index 00000000000..e4401bb053b --- /dev/null +++ b/tools/converters/res_arc.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 + +from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes +import subprocess +from os import walk, makedirs +from pathlib import Path +from typing import NamedTuple, DefaultDict +import re + +DTK_PATH = str(Path("./build/tools/dtk.exe")) +OUT_PATH = "build/res" +INDENT = " " * 4 +ADD_EXT_TO_ENUM = False +SKIP_STAGE_ARCS = True +SKIP_DEMO_ARCS = True +SKIP_FILES_WITH_AT_SIGN = True +# get when this file was modified last, used to detect if we should re-extract enums +THIS_MTIME = Path(__file__).stat().st_mtime + +class ArcFile(NamedTuple): + file_name:str + index:int + id:int + +class ArcNode(NamedTuple): + node_type:bytes + name:str + inds:range + +class ArcEnumValue(NamedTuple): + enum_value_name:str + value:int + +class ArcEnum(NamedTuple): + enum_name:str + values:list[ArcEnumValue] + +class JointParsedEnums(NamedTuple): + enums:list[ArcEnum] + +def ensure_dir(path:str)->None: + makedirs(path, exist_ok=True) + +def bin_make_str(s:bytes)->str: + s = s.replace(b"\x82\x98", b"x") + try: + s = s.decode("ascii") + except Exception as e: + raise AssertionError(f"{e}: {s}") + return s + +def sanitize_string(s:str)->str: + return re.sub(r"[\s@:\.,\-<>*%\"!&()|\+$]", "_", s) + +def read_str(binf)->str: + return bin_make_str(read_bytes_until_null(binf)) + +def make_enum(e:ArcEnum): + out = [] + out.append(f"enum {e.enum_name} {{") + for v in e.values: + out.append(f"{INDENT}{v.enum_value_name}=0x{v.value:X},") + out.append("};") + + out_enum = "\n".join(out) + + return out_enum + +def parse_bmd(src_path:Path): + global ADD_EXT_TO_ENUM + with src_path.open("rb") as binf: + header_magic = binf.read(4) + known_J3D_magic = b"J3D2" + assert(header_magic == b"J3D2"), f"Attempted to parse {src_path} as bmd/bdl, but J3D header type doesn't match {known_J3D_magic} : {header_magic}" + bmd_magic = binf.read(4) + known_bmd_magics = (b"bmd3", b"bmd2", b"bdl4") + assert(bmd_magic in known_bmd_magics), f"Attempted to parse {src_path} as bmd/bdl, but bmd/bdl header type doesn't match any of {known_bmd_magics} : {bmd_magic}" + skip_bytes(binf, 4) + chunk_count = read_u32(binf) + binf.seek(0x20) + for _ in range(chunk_count): + chunk_begin = binf.tell() + name = bin_make_str(binf.read(4)) + size = read_u32(binf) + next_chunk = chunk_begin + size + if name == "JNT1": + skip_bytes(binf, 12) + name_table = read_u32(binf) + chunk_begin + binf.seek(name_table) + num_strings = read_u16(binf) + found_enums = [] + if ADD_EXT_TO_ENUM: + out_enum_name = sanitize_string(src_path.name.replace(".", "_")).upper() + "_JNT" + else: + out_enum_name = sanitize_string(src_path.name.split(".")[0]).upper() + "_JNT" + + for i in range(num_strings): + binf.seek(name_table + 6 + i * 4) + string_offset = read_u16(binf) + binf.seek(name_table + string_offset) + + joint_name = f"{out_enum_name}_{read_str(binf).upper()}_e" + found_enums.append(ArcEnumValue(joint_name, i)) + # print(joint_name) + return ArcEnum(out_enum_name, found_enums) + binf.seek(next_chunk) + return None + +def extract_joint_enums(src_path:Path): + if ((SKIP_FILES_WITH_AT_SIGN and "@" in str(src_path)) or + (SKIP_STAGE_ARCS and "Stage" in src_path.parts) or + (SKIP_DEMO_ARCS and any(x.startswith("Demo") for x in src_path.parts))): + return JointParsedEnums([]) + + out_jnt_enums:list[ArcEnum] = [] + internal_files = subprocess.run([DTK_PATH, "vfs", "ls", "-r", f"{src_path}:"], stdout=subprocess.PIPE, text=True).stdout + output_folder = Path(str(src_path).replace(".", "__")) + for line in internal_files.split("\n"): + parts = line.split(" | ") + if len(parts) != 3: continue + + internal_file = parts[1].strip(" ") + internal_file_parts = internal_file.split(".") + if len(internal_file_parts) < 2: continue + + extension = internal_file_parts[1] + if (extension not in ("bmd", "bdl")): continue + + internal_file_path = output_folder / internal_file + + # extract file from archive if either + # 1. output file doesn't exist + # 2. the archive file is newer than the output file (modded src) + if (not internal_file_path.exists() or + src_path.stat().st_mtime > internal_file_path.stat().st_mode): + ensure_dir(internal_file_path.parent) + subprocess.run([DTK_PATH, "vfs", "cp", f"{src_path}:{internal_file}", internal_file_path], stdout=subprocess.PIPE) + + out_enums = parse_bmd(internal_file_path) + + out_jnt_enums.append(out_enums) + + return JointParsedEnums(out_jnt_enums) + +def convert_binary_to_resource_enum(src_path: Path, dest_path: Path) -> None: + joint_enums = extract_joint_enums(src_path) + + with src_path.open("rb") as binf: + opening_bytes = binf.read(4) + assert(opening_bytes == b"RARC"), f"Not a rarc file: starts with bytes {opening_bytes}" + skip_bytes(binf, 4) + data_header_offset = read_u32(binf) + binf.seek(data_header_offset) + node_count = read_u32(binf) + node_offset = read_u32(binf) + data_header_offset + total_num_file_entries = read_u32(binf) + file_entries_list_offset = read_u32(binf) + data_header_offset + skip_bytes(binf, 4) + string_list_offset = read_u32(binf) + data_header_offset + skip_bytes(binf, 2) + sync_flag = read_u8(binf) + + found_files:list[ArcFile] = [] + all_file_names = [] + for entry_index in range(total_num_file_entries): + binf.seek(file_entries_list_offset + entry_index * 0x14) + file_id = read_u16(binf) + skip_bytes(binf, 2) + type_and_name_offset = read_u32(binf) + entry_type = type_and_name_offset >> 24 + name_offset = type_and_name_offset & 0x00FFFFFF + binf.seek(string_list_offset + name_offset) + file_name = read_str(binf) + if entry_type & 1: # check to make sure its a file + assert(not sync_flag or file_id == entry_index), \ + f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}" + found_files.append(ArcFile(file_name, entry_index, file_id)) + all_file_names.append(file_name) + + index_file_lookup = {x.index : x for x in found_files} + + found_nodes:list[tuple[ArcNode, list[ArcFile]]] = [] + + for node_index in range(node_count): + binf.seek(node_offset + node_index * 0x10) + this_node_type = bin_make_str(binf.read(4)) + this_node_name_offs = read_u32(binf) + skip_bytes(binf, 2) + this_node_index_count = read_u16(binf) + this_node_first_index = read_u32(binf) + this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count) + binf.seek(string_list_offset + this_node_name_offs) + this_node_name = read_str(binf) + this_node = ArcNode(this_node_type, this_node_name, this_node_inds) + found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup])) + + out_lines:list[str] = [] + file_stem = src_path.name.split(".")[0] + file_stem_upper = sanitize_string(file_stem.upper()) + + out_lines.append(f"#ifndef RES_{file_stem_upper}_H") + out_lines.append(f"#define RES_{file_stem_upper}_H\n") + + out_ids:list[str] = [] + out_idxs:list[str] = [] + + appearance_count = DefaultDict(int) + + for node, files in found_nodes: + if len(files) == 0: continue + file_type_break = f"{INDENT}/* {node.node_type} */" + out_ids.append(file_type_break) + out_idxs.append(file_type_break) + for file in files: + parts = file.file_name.split(".") + santitized_file_name = sanitize_string(parts[0].upper()) # Sanitize identifier + + seen_count = appearance_count[santitized_file_name] + appearance_count[santitized_file_name] += 1 + + ext = "" + if len(parts) > 1: + ext = sanitize_string(parts[1].upper()) + + duplicate_tag = "_" + if seen_count > 0: + duplicate_tag = f"_{seen_count}_" + + # tiny optimization to do less string formatting + begin_part = f"{INDENT}dRes_" + mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}{duplicate_tag}e=0x" + out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},") + out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},") + + out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{") + out_lines.extend(out_idxs) + out_lines.append("};\n") + + out_lines.append(f"enum dRes_ID_{file_stem_upper} {{") + out_lines.extend(out_ids) + out_lines.append("};\n") + + for joint_enum in joint_enums.enums: + out_lines.append(make_enum(joint_enum) + "\n") + + out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */") + + out = "\n".join(out_lines) + ensure_dir(dest_path.parent) + with dest_path.open("w") as f: + f.write(out) + +def decompress_file(input_file:Path, output_file:Path) -> None: + # use pathlib to allow for unix+windows paths + subprocess.run([DTK_PATH, "yaz0", "decompress", input_file, "-o", output_file]) + +def extract_enum_from_file(src_path:Path, dst_path:Path) -> None: + assert(src_path.exists()) + + # we can skip extracting this file if all of the following are true + # 1. The output file exists + # 2. The src file is older than the output file (not modded) + # 3. This python file is older than the output file (no updates to how we extract enums) + if (dst_path.exists() and + src_path.stat().st_mtime < dst_path.stat().st_mtime and + THIS_MTIME < dst_path.stat().st_mtime): + return + + # check the first bytes of the file + with src_path.open("rb") as f: + starting_bytes = f.read(4) + + if starting_bytes == b"Yaz0": is_compressed = True + elif starting_bytes == b"RARC": is_compressed = False + # not an arc file although it has the .arc extensions + else: return + + if is_compressed: + # if our file is compressed, then we should decompress it + # we only need to decompress if any of these are true + # 1. We've never decompressed this file before + # 2. The src file is newer than the output file (modded src) + new_src_path = src_path.with_suffix(src_path.suffix + ".decompressed") + if (not new_src_path.exists() or + new_src_path.stat().st_mtime < src_path.stat().st_mtime): + decompress_file(src_path, new_src_path) + src_path = new_src_path + + convert_binary_to_resource_enum(src_path, dst_path) + +def main() -> None: + for dir, dirnames, filenames in walk("./orig/"): + dirpath = Path(dir) + if "res" not in dirpath.parts: continue + + for file in filenames: + file_path = dirpath / file + if file_path.suffix == ".arc": + # the version should be the second part of the path + # ./orig/ShieldD/... + version = file_path.parts[1] + # find the res folder, truncate the path to be the part after the res folder + out_path = Path("/".join(file_path.parts[file_path.parts.index("res") + 1:])) + # set the output path to be the designated output + the version + the file's heirarchy + out_path = OUT_PATH / (version / out_path) + # we're going to output to a header file, prefix it with "res_" + out_path = out_path.with_name("res_" + out_path.name).with_suffix(".h") + try: + extract_enum_from_file(file_path, out_path) + except AssertionError as e: + print(f"ERROR: {file_path} -> {out_path}\n{e}\n") + +if __name__ == "__main__": + main() diff --git a/tools/res_arc.py b/tools/res_arc.py deleted file mode 100644 index f27a80c48bd..00000000000 --- a/tools/res_arc.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python3 - -from argparse import ArgumentParser - -from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes - -import subprocess -from os.path import exists, getmtime -from pathlib import Path -from typing import NamedTuple - -class ArcFile(NamedTuple): - file_name:str - index:int - id:int - -class ArcNode(NamedTuple): - node_type:bytes - name:str - inds:range - -def convert_binary_to_resource_enum(src_path: str, dest_path: str) -> None: - with open(src_path, "rb") as binf: - assert(binf.read(4) == b"RARC"), "Not a rarc file" - skip_bytes(binf, 4) - data_header_offset = read_u32(binf) - binf.seek(data_header_offset) - node_count = read_u32(binf) - node_offset = read_u32(binf) + data_header_offset - total_num_file_entries = read_u32(binf) - file_entries_list_offset = read_u32(binf) + data_header_offset - skip_bytes(binf, 4) - string_list_offset = read_u32(binf) + data_header_offset - skip_bytes(binf, 2) - sync_flag = read_u8(binf) - - found_files:list[ArcFile] = [] - - for entry_index in range(total_num_file_entries): - binf.seek(file_entries_list_offset + entry_index * 0x14) - file_id = read_u16(binf) - skip_bytes(binf, 2) - type_and_name_offset = read_u32(binf) - entry_type = type_and_name_offset >> 24 - name_offset = type_and_name_offset & 0x00FFFFFF - binf.seek(string_list_offset + name_offset) - file_name = read_bytes_until_null(binf).decode("ascii") - if entry_type & 1: # check to make sure its a file - assert(not sync_flag or file_id == entry_index), \ - f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}" - found_files.append(ArcFile(file_name, entry_index, file_id)) - assert(len(set([x.file_name for x in found_files])) == len(found_files)), "duplicate file names found, unsupported" - - index_file_lookup = {x.index : x for x in found_files} - - found_nodes:list[tuple[ArcNode, list[ArcFile]]] = [] - - for node_index in range(node_count): - binf.seek(node_offset + node_index * 0x10) - this_node_type = binf.read(4).decode("ascii").strip(" ") - this_node_name_offs = read_u32(binf) - skip_bytes(binf, 2) - this_node_index_count = read_u16(binf) - this_node_first_index = read_u32(binf) - this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count) - binf.seek(string_list_offset + this_node_name_offs) - this_node_name = read_bytes_until_null(binf).decode("ascii") - this_node = ArcNode(this_node_type, this_node_name, this_node_inds) - found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup])) - - out_lines:list[str] = [] - file_stem = Path(src_path).name.split(".")[0] - file_stem_upper = file_stem.upper() - indent = " " - - out_lines.append(f"#ifndef RES_{file_stem_upper}_H") - out_lines.append(f"#define RES_{file_stem_upper}_H\n") - - out_ids:list[str] = [] - out_idxs:list[str] = [] - - for node, files in found_nodes: - if len(files) == 0: continue - file_type_break = f"{indent}/* {node.node_type} */" - out_ids.append(file_type_break) - out_idxs.append(file_type_break) - for file in files: - parts = file.file_name.split(".") - santitized_file_name = parts[0].upper() - ext = parts[1].upper() - # tiny optimization to do less string formatting - begin_part = f"{indent}dRes_" - mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}_e=0x" - out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},") - out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},") - - out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{") - out_lines.extend(out_idxs) - out_lines.append("};\n") - - out_lines.append(f"enum dRes_ID_{file_stem_upper} {{") - out_lines.extend(out_ids) - out_lines.append("};\n") - - out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */") - - out = "\n".join(out_lines) - - with open(dest_path, "w") as f: - f.write(out) - - -def decompress_file(input_file:str, output_file:str) -> None: - # TODO: fix this path to be more flexible - - # use pathlib to allow for unix+windows paths - dtk_path = str(Path("./build/tools/dtk.exe")) - - subprocess.run([dtk_path, "yaz0", "decompress", input_file, "-o", output_file]) - -def main() -> None: - parser = ArgumentParser( - description="TODO" - ) - parser.add_argument("src_path", type=str, help="Binary source file path") - parser.add_argument("dest_path", type=str, help="Destination C include file path") - args = parser.parse_args() - - src_path = args.src_path - dst_path = args.dest_path - - assert(exists(src_path)) - - # if we have already made this file, skip - # check if the src_file is newer than the output file, means modded arc, worth updating - if (exists(dst_path)) and (getmtime(dst_path) > getmtime(src_path)): - return - - with open(src_path, "rb") as f: - is_compressed = (f.read(4) == b"Yaz0") - - if is_compressed: - # if our file is compressed, then we should decompress it - # but skip decompressing if it exists - # check for modded src tho - new_src_path = src_path + ".decompressed" - if (not exists(new_src_path)) or (getmtime(new_src_path) < getmtime(src_path)): - decompress_file(src_path, new_src_path) - src_path = new_src_path - - convert_binary_to_resource_enum(src_path, dst_path) - - -if __name__ == "__main__": - main()