Add ability to extract joint enums from bmd/bdl files (#2704)

* arc extraction will auto-create resource files

* Update enum extraction to extract joint enums

* Update enum extraction to use pathlib

* Move enum extraction to converters folder

* Added check to extract bmd file if src file is modded
This commit is contained in:
roeming 2025-09-27 18:30:13 -04:00 committed by GitHub
parent 6242aa6e84
commit 3f37bad921
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 325 additions and 167 deletions

View File

@ -48,15 +48,14 @@ def read_f32(binary_file: BinaryIO) -> int:
return struct.unpack(">f", chunk)[0]
def read_bytes_until_null(binary_file: BinaryIO) -> bytes:
str_length = 0
while True:
char = binary_file.read(1)
if char == b"\0":
break
else:
str_length += 1
binary_file.seek(-(str_length+1), os.SEEK_CUR)
string = binary_file.read(str_length)
return string
begin = binary_file.tell()
while True:
b = binary_file.read(1)
if len(b) == 0:
raise EOFError()
if b[0] == 0:
break
str_length = binary_file.tell() - begin - 1
binary_file.seek(begin)
return binary_file.read(str_length)

314
tools/converters/res_arc.py Normal file
View File

@ -0,0 +1,314 @@
#!/usr/bin/env python3
from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes
import subprocess
from os import walk, makedirs
from pathlib import Path
from typing import NamedTuple, DefaultDict
import re
DTK_PATH = str(Path("./build/tools/dtk.exe"))
OUT_PATH = "build/res"
INDENT = " " * 4
ADD_EXT_TO_ENUM = False
SKIP_STAGE_ARCS = True
SKIP_DEMO_ARCS = True
SKIP_FILES_WITH_AT_SIGN = True
# get when this file was modified last, used to detect if we should re-extract enums
THIS_MTIME = Path(__file__).stat().st_mtime
class ArcFile(NamedTuple):
file_name:str
index:int
id:int
class ArcNode(NamedTuple):
node_type:bytes
name:str
inds:range
class ArcEnumValue(NamedTuple):
enum_value_name:str
value:int
class ArcEnum(NamedTuple):
enum_name:str
values:list[ArcEnumValue]
class JointParsedEnums(NamedTuple):
enums:list[ArcEnum]
def ensure_dir(path:str)->None:
makedirs(path, exist_ok=True)
def bin_make_str(s:bytes)->str:
s = s.replace(b"\x82\x98", b"x")
try:
s = s.decode("ascii")
except Exception as e:
raise AssertionError(f"{e}: {s}")
return s
def sanitize_string(s:str)->str:
return re.sub(r"[\s@:\.,\-<>*%\"!&()|\+$]", "_", s)
def read_str(binf)->str:
return bin_make_str(read_bytes_until_null(binf))
def make_enum(e:ArcEnum):
out = []
out.append(f"enum {e.enum_name} {{")
for v in e.values:
out.append(f"{INDENT}{v.enum_value_name}=0x{v.value:X},")
out.append("};")
out_enum = "\n".join(out)
return out_enum
def parse_bmd(src_path:Path):
global ADD_EXT_TO_ENUM
with src_path.open("rb") as binf:
header_magic = binf.read(4)
known_J3D_magic = b"J3D2"
assert(header_magic == b"J3D2"), f"Attempted to parse {src_path} as bmd/bdl, but J3D header type doesn't match {known_J3D_magic} : {header_magic}"
bmd_magic = binf.read(4)
known_bmd_magics = (b"bmd3", b"bmd2", b"bdl4")
assert(bmd_magic in known_bmd_magics), f"Attempted to parse {src_path} as bmd/bdl, but bmd/bdl header type doesn't match any of {known_bmd_magics} : {bmd_magic}"
skip_bytes(binf, 4)
chunk_count = read_u32(binf)
binf.seek(0x20)
for _ in range(chunk_count):
chunk_begin = binf.tell()
name = bin_make_str(binf.read(4))
size = read_u32(binf)
next_chunk = chunk_begin + size
if name == "JNT1":
skip_bytes(binf, 12)
name_table = read_u32(binf) + chunk_begin
binf.seek(name_table)
num_strings = read_u16(binf)
found_enums = []
if ADD_EXT_TO_ENUM:
out_enum_name = sanitize_string(src_path.name.replace(".", "_")).upper() + "_JNT"
else:
out_enum_name = sanitize_string(src_path.name.split(".")[0]).upper() + "_JNT"
for i in range(num_strings):
binf.seek(name_table + 6 + i * 4)
string_offset = read_u16(binf)
binf.seek(name_table + string_offset)
joint_name = f"{out_enum_name}_{read_str(binf).upper()}_e"
found_enums.append(ArcEnumValue(joint_name, i))
# print(joint_name)
return ArcEnum(out_enum_name, found_enums)
binf.seek(next_chunk)
return None
def extract_joint_enums(src_path:Path):
if ((SKIP_FILES_WITH_AT_SIGN and "@" in str(src_path)) or
(SKIP_STAGE_ARCS and "Stage" in src_path.parts) or
(SKIP_DEMO_ARCS and any(x.startswith("Demo") for x in src_path.parts))):
return JointParsedEnums([])
out_jnt_enums:list[ArcEnum] = []
internal_files = subprocess.run([DTK_PATH, "vfs", "ls", "-r", f"{src_path}:"], stdout=subprocess.PIPE, text=True).stdout
output_folder = Path(str(src_path).replace(".", "__"))
for line in internal_files.split("\n"):
parts = line.split(" | ")
if len(parts) != 3: continue
internal_file = parts[1].strip(" ")
internal_file_parts = internal_file.split(".")
if len(internal_file_parts) < 2: continue
extension = internal_file_parts[1]
if (extension not in ("bmd", "bdl")): continue
internal_file_path = output_folder / internal_file
# extract file from archive if either
# 1. output file doesn't exist
# 2. the archive file is newer than the output file (modded src)
if (not internal_file_path.exists() or
src_path.stat().st_mtime > internal_file_path.stat().st_mode):
ensure_dir(internal_file_path.parent)
subprocess.run([DTK_PATH, "vfs", "cp", f"{src_path}:{internal_file}", internal_file_path], stdout=subprocess.PIPE)
out_enums = parse_bmd(internal_file_path)
out_jnt_enums.append(out_enums)
return JointParsedEnums(out_jnt_enums)
def convert_binary_to_resource_enum(src_path: Path, dest_path: Path) -> None:
joint_enums = extract_joint_enums(src_path)
with src_path.open("rb") as binf:
opening_bytes = binf.read(4)
assert(opening_bytes == b"RARC"), f"Not a rarc file: starts with bytes {opening_bytes}"
skip_bytes(binf, 4)
data_header_offset = read_u32(binf)
binf.seek(data_header_offset)
node_count = read_u32(binf)
node_offset = read_u32(binf) + data_header_offset
total_num_file_entries = read_u32(binf)
file_entries_list_offset = read_u32(binf) + data_header_offset
skip_bytes(binf, 4)
string_list_offset = read_u32(binf) + data_header_offset
skip_bytes(binf, 2)
sync_flag = read_u8(binf)
found_files:list[ArcFile] = []
all_file_names = []
for entry_index in range(total_num_file_entries):
binf.seek(file_entries_list_offset + entry_index * 0x14)
file_id = read_u16(binf)
skip_bytes(binf, 2)
type_and_name_offset = read_u32(binf)
entry_type = type_and_name_offset >> 24
name_offset = type_and_name_offset & 0x00FFFFFF
binf.seek(string_list_offset + name_offset)
file_name = read_str(binf)
if entry_type & 1: # check to make sure its a file
assert(not sync_flag or file_id == entry_index), \
f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}"
found_files.append(ArcFile(file_name, entry_index, file_id))
all_file_names.append(file_name)
index_file_lookup = {x.index : x for x in found_files}
found_nodes:list[tuple[ArcNode, list[ArcFile]]] = []
for node_index in range(node_count):
binf.seek(node_offset + node_index * 0x10)
this_node_type = bin_make_str(binf.read(4))
this_node_name_offs = read_u32(binf)
skip_bytes(binf, 2)
this_node_index_count = read_u16(binf)
this_node_first_index = read_u32(binf)
this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count)
binf.seek(string_list_offset + this_node_name_offs)
this_node_name = read_str(binf)
this_node = ArcNode(this_node_type, this_node_name, this_node_inds)
found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup]))
out_lines:list[str] = []
file_stem = src_path.name.split(".")[0]
file_stem_upper = sanitize_string(file_stem.upper())
out_lines.append(f"#ifndef RES_{file_stem_upper}_H")
out_lines.append(f"#define RES_{file_stem_upper}_H\n")
out_ids:list[str] = []
out_idxs:list[str] = []
appearance_count = DefaultDict(int)
for node, files in found_nodes:
if len(files) == 0: continue
file_type_break = f"{INDENT}/* {node.node_type} */"
out_ids.append(file_type_break)
out_idxs.append(file_type_break)
for file in files:
parts = file.file_name.split(".")
santitized_file_name = sanitize_string(parts[0].upper()) # Sanitize identifier
seen_count = appearance_count[santitized_file_name]
appearance_count[santitized_file_name] += 1
ext = ""
if len(parts) > 1:
ext = sanitize_string(parts[1].upper())
duplicate_tag = "_"
if seen_count > 0:
duplicate_tag = f"_{seen_count}_"
# tiny optimization to do less string formatting
begin_part = f"{INDENT}dRes_"
mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}{duplicate_tag}e=0x"
out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},")
out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},")
out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{")
out_lines.extend(out_idxs)
out_lines.append("};\n")
out_lines.append(f"enum dRes_ID_{file_stem_upper} {{")
out_lines.extend(out_ids)
out_lines.append("};\n")
for joint_enum in joint_enums.enums:
out_lines.append(make_enum(joint_enum) + "\n")
out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */")
out = "\n".join(out_lines)
ensure_dir(dest_path.parent)
with dest_path.open("w") as f:
f.write(out)
def decompress_file(input_file:Path, output_file:Path) -> None:
# use pathlib to allow for unix+windows paths
subprocess.run([DTK_PATH, "yaz0", "decompress", input_file, "-o", output_file])
def extract_enum_from_file(src_path:Path, dst_path:Path) -> None:
assert(src_path.exists())
# we can skip extracting this file if all of the following are true
# 1. The output file exists
# 2. The src file is older than the output file (not modded)
# 3. This python file is older than the output file (no updates to how we extract enums)
if (dst_path.exists() and
src_path.stat().st_mtime < dst_path.stat().st_mtime and
THIS_MTIME < dst_path.stat().st_mtime):
return
# check the first bytes of the file
with src_path.open("rb") as f:
starting_bytes = f.read(4)
if starting_bytes == b"Yaz0": is_compressed = True
elif starting_bytes == b"RARC": is_compressed = False
# not an arc file although it has the .arc extensions
else: return
if is_compressed:
# if our file is compressed, then we should decompress it
# we only need to decompress if any of these are true
# 1. We've never decompressed this file before
# 2. The src file is newer than the output file (modded src)
new_src_path = src_path.with_suffix(src_path.suffix + ".decompressed")
if (not new_src_path.exists() or
new_src_path.stat().st_mtime < src_path.stat().st_mtime):
decompress_file(src_path, new_src_path)
src_path = new_src_path
convert_binary_to_resource_enum(src_path, dst_path)
def main() -> None:
for dir, dirnames, filenames in walk("./orig/"):
dirpath = Path(dir)
if "res" not in dirpath.parts: continue
for file in filenames:
file_path = dirpath / file
if file_path.suffix == ".arc":
# the version should be the second part of the path
# ./orig/ShieldD/...
version = file_path.parts[1]
# find the res folder, truncate the path to be the part after the res folder
out_path = Path("/".join(file_path.parts[file_path.parts.index("res") + 1:]))
# set the output path to be the designated output + the version + the file's heirarchy
out_path = OUT_PATH / (version / out_path)
# we're going to output to a header file, prefix it with "res_"
out_path = out_path.with_name("res_" + out_path.name).with_suffix(".h")
try:
extract_enum_from_file(file_path, out_path)
except AssertionError as e:
print(f"ERROR: {file_path} -> {out_path}\n{e}\n")
if __name__ == "__main__":
main()

View File

@ -1,155 +0,0 @@
#!/usr/bin/env python3
from argparse import ArgumentParser
from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes
import subprocess
from os.path import exists, getmtime
from pathlib import Path
from typing import NamedTuple
class ArcFile(NamedTuple):
file_name:str
index:int
id:int
class ArcNode(NamedTuple):
node_type:bytes
name:str
inds:range
def convert_binary_to_resource_enum(src_path: str, dest_path: str) -> None:
with open(src_path, "rb") as binf:
assert(binf.read(4) == b"RARC"), "Not a rarc file"
skip_bytes(binf, 4)
data_header_offset = read_u32(binf)
binf.seek(data_header_offset)
node_count = read_u32(binf)
node_offset = read_u32(binf) + data_header_offset
total_num_file_entries = read_u32(binf)
file_entries_list_offset = read_u32(binf) + data_header_offset
skip_bytes(binf, 4)
string_list_offset = read_u32(binf) + data_header_offset
skip_bytes(binf, 2)
sync_flag = read_u8(binf)
found_files:list[ArcFile] = []
for entry_index in range(total_num_file_entries):
binf.seek(file_entries_list_offset + entry_index * 0x14)
file_id = read_u16(binf)
skip_bytes(binf, 2)
type_and_name_offset = read_u32(binf)
entry_type = type_and_name_offset >> 24
name_offset = type_and_name_offset & 0x00FFFFFF
binf.seek(string_list_offset + name_offset)
file_name = read_bytes_until_null(binf).decode("ascii")
if entry_type & 1: # check to make sure its a file
assert(not sync_flag or file_id == entry_index), \
f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}"
found_files.append(ArcFile(file_name, entry_index, file_id))
assert(len(set([x.file_name for x in found_files])) == len(found_files)), "duplicate file names found, unsupported"
index_file_lookup = {x.index : x for x in found_files}
found_nodes:list[tuple[ArcNode, list[ArcFile]]] = []
for node_index in range(node_count):
binf.seek(node_offset + node_index * 0x10)
this_node_type = binf.read(4).decode("ascii").strip(" ")
this_node_name_offs = read_u32(binf)
skip_bytes(binf, 2)
this_node_index_count = read_u16(binf)
this_node_first_index = read_u32(binf)
this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count)
binf.seek(string_list_offset + this_node_name_offs)
this_node_name = read_bytes_until_null(binf).decode("ascii")
this_node = ArcNode(this_node_type, this_node_name, this_node_inds)
found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup]))
out_lines:list[str] = []
file_stem = Path(src_path).name.split(".")[0]
file_stem_upper = file_stem.upper()
indent = " "
out_lines.append(f"#ifndef RES_{file_stem_upper}_H")
out_lines.append(f"#define RES_{file_stem_upper}_H\n")
out_ids:list[str] = []
out_idxs:list[str] = []
for node, files in found_nodes:
if len(files) == 0: continue
file_type_break = f"{indent}/* {node.node_type} */"
out_ids.append(file_type_break)
out_idxs.append(file_type_break)
for file in files:
parts = file.file_name.split(".")
santitized_file_name = parts[0].upper()
ext = parts[1].upper()
# tiny optimization to do less string formatting
begin_part = f"{indent}dRes_"
mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}_e=0x"
out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},")
out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},")
out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{")
out_lines.extend(out_idxs)
out_lines.append("};\n")
out_lines.append(f"enum dRes_ID_{file_stem_upper} {{")
out_lines.extend(out_ids)
out_lines.append("};\n")
out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */")
out = "\n".join(out_lines)
with open(dest_path, "w") as f:
f.write(out)
def decompress_file(input_file:str, output_file:str) -> None:
# TODO: fix this path to be more flexible
# use pathlib to allow for unix+windows paths
dtk_path = str(Path("./build/tools/dtk.exe"))
subprocess.run([dtk_path, "yaz0", "decompress", input_file, "-o", output_file])
def main() -> None:
parser = ArgumentParser(
description="TODO"
)
parser.add_argument("src_path", type=str, help="Binary source file path")
parser.add_argument("dest_path", type=str, help="Destination C include file path")
args = parser.parse_args()
src_path = args.src_path
dst_path = args.dest_path
assert(exists(src_path))
# if we have already made this file, skip
# check if the src_file is newer than the output file, means modded arc, worth updating
if (exists(dst_path)) and (getmtime(dst_path) > getmtime(src_path)):
return
with open(src_path, "rb") as f:
is_compressed = (f.read(4) == b"Yaz0")
if is_compressed:
# if our file is compressed, then we should decompress it
# but skip decompressing if it exists
# check for modded src tho
new_src_path = src_path + ".decompressed"
if (not exists(new_src_path)) or (getmtime(new_src_path) < getmtime(src_path)):
decompress_file(src_path, new_src_path)
src_path = new_src_path
convert_binary_to_resource_enum(src_path, dst_path)
if __name__ == "__main__":
main()