mirror of https://github.com/zeldaret/tp.git
Add ability to extract joint enums from bmd/bdl files (#2704)
* arc extraction will auto-create resource files * Update enum extraction to extract joint enums * Update enum extraction to use pathlib * Move enum extraction to converters folder * Added check to extract bmd file if src file is modded
This commit is contained in:
parent
6242aa6e84
commit
3f37bad921
|
|
@ -48,15 +48,14 @@ def read_f32(binary_file: BinaryIO) -> int:
|
|||
return struct.unpack(">f", chunk)[0]
|
||||
|
||||
def read_bytes_until_null(binary_file: BinaryIO) -> bytes:
|
||||
str_length = 0
|
||||
while True:
|
||||
char = binary_file.read(1)
|
||||
if char == b"\0":
|
||||
break
|
||||
else:
|
||||
str_length += 1
|
||||
|
||||
binary_file.seek(-(str_length+1), os.SEEK_CUR)
|
||||
string = binary_file.read(str_length)
|
||||
|
||||
return string
|
||||
begin = binary_file.tell()
|
||||
while True:
|
||||
b = binary_file.read(1)
|
||||
if len(b) == 0:
|
||||
raise EOFError()
|
||||
if b[0] == 0:
|
||||
break
|
||||
|
||||
str_length = binary_file.tell() - begin - 1
|
||||
binary_file.seek(begin)
|
||||
return binary_file.read(str_length)
|
||||
|
|
@ -0,0 +1,314 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes
|
||||
import subprocess
|
||||
from os import walk, makedirs
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple, DefaultDict
|
||||
import re
|
||||
|
||||
DTK_PATH = str(Path("./build/tools/dtk.exe"))
|
||||
OUT_PATH = "build/res"
|
||||
INDENT = " " * 4
|
||||
ADD_EXT_TO_ENUM = False
|
||||
SKIP_STAGE_ARCS = True
|
||||
SKIP_DEMO_ARCS = True
|
||||
SKIP_FILES_WITH_AT_SIGN = True
|
||||
# get when this file was modified last, used to detect if we should re-extract enums
|
||||
THIS_MTIME = Path(__file__).stat().st_mtime
|
||||
|
||||
class ArcFile(NamedTuple):
|
||||
file_name:str
|
||||
index:int
|
||||
id:int
|
||||
|
||||
class ArcNode(NamedTuple):
|
||||
node_type:bytes
|
||||
name:str
|
||||
inds:range
|
||||
|
||||
class ArcEnumValue(NamedTuple):
|
||||
enum_value_name:str
|
||||
value:int
|
||||
|
||||
class ArcEnum(NamedTuple):
|
||||
enum_name:str
|
||||
values:list[ArcEnumValue]
|
||||
|
||||
class JointParsedEnums(NamedTuple):
|
||||
enums:list[ArcEnum]
|
||||
|
||||
def ensure_dir(path:str)->None:
|
||||
makedirs(path, exist_ok=True)
|
||||
|
||||
def bin_make_str(s:bytes)->str:
|
||||
s = s.replace(b"\x82\x98", b"x")
|
||||
try:
|
||||
s = s.decode("ascii")
|
||||
except Exception as e:
|
||||
raise AssertionError(f"{e}: {s}")
|
||||
return s
|
||||
|
||||
def sanitize_string(s:str)->str:
|
||||
return re.sub(r"[\s@:\.,\-<>*%\"!&()|\+$]", "_", s)
|
||||
|
||||
def read_str(binf)->str:
|
||||
return bin_make_str(read_bytes_until_null(binf))
|
||||
|
||||
def make_enum(e:ArcEnum):
|
||||
out = []
|
||||
out.append(f"enum {e.enum_name} {{")
|
||||
for v in e.values:
|
||||
out.append(f"{INDENT}{v.enum_value_name}=0x{v.value:X},")
|
||||
out.append("};")
|
||||
|
||||
out_enum = "\n".join(out)
|
||||
|
||||
return out_enum
|
||||
|
||||
def parse_bmd(src_path:Path):
|
||||
global ADD_EXT_TO_ENUM
|
||||
with src_path.open("rb") as binf:
|
||||
header_magic = binf.read(4)
|
||||
known_J3D_magic = b"J3D2"
|
||||
assert(header_magic == b"J3D2"), f"Attempted to parse {src_path} as bmd/bdl, but J3D header type doesn't match {known_J3D_magic} : {header_magic}"
|
||||
bmd_magic = binf.read(4)
|
||||
known_bmd_magics = (b"bmd3", b"bmd2", b"bdl4")
|
||||
assert(bmd_magic in known_bmd_magics), f"Attempted to parse {src_path} as bmd/bdl, but bmd/bdl header type doesn't match any of {known_bmd_magics} : {bmd_magic}"
|
||||
skip_bytes(binf, 4)
|
||||
chunk_count = read_u32(binf)
|
||||
binf.seek(0x20)
|
||||
for _ in range(chunk_count):
|
||||
chunk_begin = binf.tell()
|
||||
name = bin_make_str(binf.read(4))
|
||||
size = read_u32(binf)
|
||||
next_chunk = chunk_begin + size
|
||||
if name == "JNT1":
|
||||
skip_bytes(binf, 12)
|
||||
name_table = read_u32(binf) + chunk_begin
|
||||
binf.seek(name_table)
|
||||
num_strings = read_u16(binf)
|
||||
found_enums = []
|
||||
if ADD_EXT_TO_ENUM:
|
||||
out_enum_name = sanitize_string(src_path.name.replace(".", "_")).upper() + "_JNT"
|
||||
else:
|
||||
out_enum_name = sanitize_string(src_path.name.split(".")[0]).upper() + "_JNT"
|
||||
|
||||
for i in range(num_strings):
|
||||
binf.seek(name_table + 6 + i * 4)
|
||||
string_offset = read_u16(binf)
|
||||
binf.seek(name_table + string_offset)
|
||||
|
||||
joint_name = f"{out_enum_name}_{read_str(binf).upper()}_e"
|
||||
found_enums.append(ArcEnumValue(joint_name, i))
|
||||
# print(joint_name)
|
||||
return ArcEnum(out_enum_name, found_enums)
|
||||
binf.seek(next_chunk)
|
||||
return None
|
||||
|
||||
def extract_joint_enums(src_path:Path):
|
||||
if ((SKIP_FILES_WITH_AT_SIGN and "@" in str(src_path)) or
|
||||
(SKIP_STAGE_ARCS and "Stage" in src_path.parts) or
|
||||
(SKIP_DEMO_ARCS and any(x.startswith("Demo") for x in src_path.parts))):
|
||||
return JointParsedEnums([])
|
||||
|
||||
out_jnt_enums:list[ArcEnum] = []
|
||||
internal_files = subprocess.run([DTK_PATH, "vfs", "ls", "-r", f"{src_path}:"], stdout=subprocess.PIPE, text=True).stdout
|
||||
output_folder = Path(str(src_path).replace(".", "__"))
|
||||
for line in internal_files.split("\n"):
|
||||
parts = line.split(" | ")
|
||||
if len(parts) != 3: continue
|
||||
|
||||
internal_file = parts[1].strip(" ")
|
||||
internal_file_parts = internal_file.split(".")
|
||||
if len(internal_file_parts) < 2: continue
|
||||
|
||||
extension = internal_file_parts[1]
|
||||
if (extension not in ("bmd", "bdl")): continue
|
||||
|
||||
internal_file_path = output_folder / internal_file
|
||||
|
||||
# extract file from archive if either
|
||||
# 1. output file doesn't exist
|
||||
# 2. the archive file is newer than the output file (modded src)
|
||||
if (not internal_file_path.exists() or
|
||||
src_path.stat().st_mtime > internal_file_path.stat().st_mode):
|
||||
ensure_dir(internal_file_path.parent)
|
||||
subprocess.run([DTK_PATH, "vfs", "cp", f"{src_path}:{internal_file}", internal_file_path], stdout=subprocess.PIPE)
|
||||
|
||||
out_enums = parse_bmd(internal_file_path)
|
||||
|
||||
out_jnt_enums.append(out_enums)
|
||||
|
||||
return JointParsedEnums(out_jnt_enums)
|
||||
|
||||
def convert_binary_to_resource_enum(src_path: Path, dest_path: Path) -> None:
|
||||
joint_enums = extract_joint_enums(src_path)
|
||||
|
||||
with src_path.open("rb") as binf:
|
||||
opening_bytes = binf.read(4)
|
||||
assert(opening_bytes == b"RARC"), f"Not a rarc file: starts with bytes {opening_bytes}"
|
||||
skip_bytes(binf, 4)
|
||||
data_header_offset = read_u32(binf)
|
||||
binf.seek(data_header_offset)
|
||||
node_count = read_u32(binf)
|
||||
node_offset = read_u32(binf) + data_header_offset
|
||||
total_num_file_entries = read_u32(binf)
|
||||
file_entries_list_offset = read_u32(binf) + data_header_offset
|
||||
skip_bytes(binf, 4)
|
||||
string_list_offset = read_u32(binf) + data_header_offset
|
||||
skip_bytes(binf, 2)
|
||||
sync_flag = read_u8(binf)
|
||||
|
||||
found_files:list[ArcFile] = []
|
||||
all_file_names = []
|
||||
for entry_index in range(total_num_file_entries):
|
||||
binf.seek(file_entries_list_offset + entry_index * 0x14)
|
||||
file_id = read_u16(binf)
|
||||
skip_bytes(binf, 2)
|
||||
type_and_name_offset = read_u32(binf)
|
||||
entry_type = type_and_name_offset >> 24
|
||||
name_offset = type_and_name_offset & 0x00FFFFFF
|
||||
binf.seek(string_list_offset + name_offset)
|
||||
file_name = read_str(binf)
|
||||
if entry_type & 1: # check to make sure its a file
|
||||
assert(not sync_flag or file_id == entry_index), \
|
||||
f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}"
|
||||
found_files.append(ArcFile(file_name, entry_index, file_id))
|
||||
all_file_names.append(file_name)
|
||||
|
||||
index_file_lookup = {x.index : x for x in found_files}
|
||||
|
||||
found_nodes:list[tuple[ArcNode, list[ArcFile]]] = []
|
||||
|
||||
for node_index in range(node_count):
|
||||
binf.seek(node_offset + node_index * 0x10)
|
||||
this_node_type = bin_make_str(binf.read(4))
|
||||
this_node_name_offs = read_u32(binf)
|
||||
skip_bytes(binf, 2)
|
||||
this_node_index_count = read_u16(binf)
|
||||
this_node_first_index = read_u32(binf)
|
||||
this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count)
|
||||
binf.seek(string_list_offset + this_node_name_offs)
|
||||
this_node_name = read_str(binf)
|
||||
this_node = ArcNode(this_node_type, this_node_name, this_node_inds)
|
||||
found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup]))
|
||||
|
||||
out_lines:list[str] = []
|
||||
file_stem = src_path.name.split(".")[0]
|
||||
file_stem_upper = sanitize_string(file_stem.upper())
|
||||
|
||||
out_lines.append(f"#ifndef RES_{file_stem_upper}_H")
|
||||
out_lines.append(f"#define RES_{file_stem_upper}_H\n")
|
||||
|
||||
out_ids:list[str] = []
|
||||
out_idxs:list[str] = []
|
||||
|
||||
appearance_count = DefaultDict(int)
|
||||
|
||||
for node, files in found_nodes:
|
||||
if len(files) == 0: continue
|
||||
file_type_break = f"{INDENT}/* {node.node_type} */"
|
||||
out_ids.append(file_type_break)
|
||||
out_idxs.append(file_type_break)
|
||||
for file in files:
|
||||
parts = file.file_name.split(".")
|
||||
santitized_file_name = sanitize_string(parts[0].upper()) # Sanitize identifier
|
||||
|
||||
seen_count = appearance_count[santitized_file_name]
|
||||
appearance_count[santitized_file_name] += 1
|
||||
|
||||
ext = ""
|
||||
if len(parts) > 1:
|
||||
ext = sanitize_string(parts[1].upper())
|
||||
|
||||
duplicate_tag = "_"
|
||||
if seen_count > 0:
|
||||
duplicate_tag = f"_{seen_count}_"
|
||||
|
||||
# tiny optimization to do less string formatting
|
||||
begin_part = f"{INDENT}dRes_"
|
||||
mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}{duplicate_tag}e=0x"
|
||||
out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},")
|
||||
out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},")
|
||||
|
||||
out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{")
|
||||
out_lines.extend(out_idxs)
|
||||
out_lines.append("};\n")
|
||||
|
||||
out_lines.append(f"enum dRes_ID_{file_stem_upper} {{")
|
||||
out_lines.extend(out_ids)
|
||||
out_lines.append("};\n")
|
||||
|
||||
for joint_enum in joint_enums.enums:
|
||||
out_lines.append(make_enum(joint_enum) + "\n")
|
||||
|
||||
out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */")
|
||||
|
||||
out = "\n".join(out_lines)
|
||||
ensure_dir(dest_path.parent)
|
||||
with dest_path.open("w") as f:
|
||||
f.write(out)
|
||||
|
||||
def decompress_file(input_file:Path, output_file:Path) -> None:
|
||||
# use pathlib to allow for unix+windows paths
|
||||
subprocess.run([DTK_PATH, "yaz0", "decompress", input_file, "-o", output_file])
|
||||
|
||||
def extract_enum_from_file(src_path:Path, dst_path:Path) -> None:
|
||||
assert(src_path.exists())
|
||||
|
||||
# we can skip extracting this file if all of the following are true
|
||||
# 1. The output file exists
|
||||
# 2. The src file is older than the output file (not modded)
|
||||
# 3. This python file is older than the output file (no updates to how we extract enums)
|
||||
if (dst_path.exists() and
|
||||
src_path.stat().st_mtime < dst_path.stat().st_mtime and
|
||||
THIS_MTIME < dst_path.stat().st_mtime):
|
||||
return
|
||||
|
||||
# check the first bytes of the file
|
||||
with src_path.open("rb") as f:
|
||||
starting_bytes = f.read(4)
|
||||
|
||||
if starting_bytes == b"Yaz0": is_compressed = True
|
||||
elif starting_bytes == b"RARC": is_compressed = False
|
||||
# not an arc file although it has the .arc extensions
|
||||
else: return
|
||||
|
||||
if is_compressed:
|
||||
# if our file is compressed, then we should decompress it
|
||||
# we only need to decompress if any of these are true
|
||||
# 1. We've never decompressed this file before
|
||||
# 2. The src file is newer than the output file (modded src)
|
||||
new_src_path = src_path.with_suffix(src_path.suffix + ".decompressed")
|
||||
if (not new_src_path.exists() or
|
||||
new_src_path.stat().st_mtime < src_path.stat().st_mtime):
|
||||
decompress_file(src_path, new_src_path)
|
||||
src_path = new_src_path
|
||||
|
||||
convert_binary_to_resource_enum(src_path, dst_path)
|
||||
|
||||
def main() -> None:
|
||||
for dir, dirnames, filenames in walk("./orig/"):
|
||||
dirpath = Path(dir)
|
||||
if "res" not in dirpath.parts: continue
|
||||
|
||||
for file in filenames:
|
||||
file_path = dirpath / file
|
||||
if file_path.suffix == ".arc":
|
||||
# the version should be the second part of the path
|
||||
# ./orig/ShieldD/...
|
||||
version = file_path.parts[1]
|
||||
# find the res folder, truncate the path to be the part after the res folder
|
||||
out_path = Path("/".join(file_path.parts[file_path.parts.index("res") + 1:]))
|
||||
# set the output path to be the designated output + the version + the file's heirarchy
|
||||
out_path = OUT_PATH / (version / out_path)
|
||||
# we're going to output to a header file, prefix it with "res_"
|
||||
out_path = out_path.with_name("res_" + out_path.name).with_suffix(".h")
|
||||
try:
|
||||
extract_enum_from_file(file_path, out_path)
|
||||
except AssertionError as e:
|
||||
print(f"ERROR: {file_path} -> {out_path}\n{e}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
155
tools/res_arc.py
155
tools/res_arc.py
|
|
@ -1,155 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes
|
||||
|
||||
import subprocess
|
||||
from os.path import exists, getmtime
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
class ArcFile(NamedTuple):
|
||||
file_name:str
|
||||
index:int
|
||||
id:int
|
||||
|
||||
class ArcNode(NamedTuple):
|
||||
node_type:bytes
|
||||
name:str
|
||||
inds:range
|
||||
|
||||
def convert_binary_to_resource_enum(src_path: str, dest_path: str) -> None:
|
||||
with open(src_path, "rb") as binf:
|
||||
assert(binf.read(4) == b"RARC"), "Not a rarc file"
|
||||
skip_bytes(binf, 4)
|
||||
data_header_offset = read_u32(binf)
|
||||
binf.seek(data_header_offset)
|
||||
node_count = read_u32(binf)
|
||||
node_offset = read_u32(binf) + data_header_offset
|
||||
total_num_file_entries = read_u32(binf)
|
||||
file_entries_list_offset = read_u32(binf) + data_header_offset
|
||||
skip_bytes(binf, 4)
|
||||
string_list_offset = read_u32(binf) + data_header_offset
|
||||
skip_bytes(binf, 2)
|
||||
sync_flag = read_u8(binf)
|
||||
|
||||
found_files:list[ArcFile] = []
|
||||
|
||||
for entry_index in range(total_num_file_entries):
|
||||
binf.seek(file_entries_list_offset + entry_index * 0x14)
|
||||
file_id = read_u16(binf)
|
||||
skip_bytes(binf, 2)
|
||||
type_and_name_offset = read_u32(binf)
|
||||
entry_type = type_and_name_offset >> 24
|
||||
name_offset = type_and_name_offset & 0x00FFFFFF
|
||||
binf.seek(string_list_offset + name_offset)
|
||||
file_name = read_bytes_until_null(binf).decode("ascii")
|
||||
if entry_type & 1: # check to make sure its a file
|
||||
assert(not sync_flag or file_id == entry_index), \
|
||||
f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}"
|
||||
found_files.append(ArcFile(file_name, entry_index, file_id))
|
||||
assert(len(set([x.file_name for x in found_files])) == len(found_files)), "duplicate file names found, unsupported"
|
||||
|
||||
index_file_lookup = {x.index : x for x in found_files}
|
||||
|
||||
found_nodes:list[tuple[ArcNode, list[ArcFile]]] = []
|
||||
|
||||
for node_index in range(node_count):
|
||||
binf.seek(node_offset + node_index * 0x10)
|
||||
this_node_type = binf.read(4).decode("ascii").strip(" ")
|
||||
this_node_name_offs = read_u32(binf)
|
||||
skip_bytes(binf, 2)
|
||||
this_node_index_count = read_u16(binf)
|
||||
this_node_first_index = read_u32(binf)
|
||||
this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count)
|
||||
binf.seek(string_list_offset + this_node_name_offs)
|
||||
this_node_name = read_bytes_until_null(binf).decode("ascii")
|
||||
this_node = ArcNode(this_node_type, this_node_name, this_node_inds)
|
||||
found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup]))
|
||||
|
||||
out_lines:list[str] = []
|
||||
file_stem = Path(src_path).name.split(".")[0]
|
||||
file_stem_upper = file_stem.upper()
|
||||
indent = " "
|
||||
|
||||
out_lines.append(f"#ifndef RES_{file_stem_upper}_H")
|
||||
out_lines.append(f"#define RES_{file_stem_upper}_H\n")
|
||||
|
||||
out_ids:list[str] = []
|
||||
out_idxs:list[str] = []
|
||||
|
||||
for node, files in found_nodes:
|
||||
if len(files) == 0: continue
|
||||
file_type_break = f"{indent}/* {node.node_type} */"
|
||||
out_ids.append(file_type_break)
|
||||
out_idxs.append(file_type_break)
|
||||
for file in files:
|
||||
parts = file.file_name.split(".")
|
||||
santitized_file_name = parts[0].upper()
|
||||
ext = parts[1].upper()
|
||||
# tiny optimization to do less string formatting
|
||||
begin_part = f"{indent}dRes_"
|
||||
mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}_e=0x"
|
||||
out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},")
|
||||
out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},")
|
||||
|
||||
out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{")
|
||||
out_lines.extend(out_idxs)
|
||||
out_lines.append("};\n")
|
||||
|
||||
out_lines.append(f"enum dRes_ID_{file_stem_upper} {{")
|
||||
out_lines.extend(out_ids)
|
||||
out_lines.append("};\n")
|
||||
|
||||
out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */")
|
||||
|
||||
out = "\n".join(out_lines)
|
||||
|
||||
with open(dest_path, "w") as f:
|
||||
f.write(out)
|
||||
|
||||
|
||||
def decompress_file(input_file:str, output_file:str) -> None:
|
||||
# TODO: fix this path to be more flexible
|
||||
|
||||
# use pathlib to allow for unix+windows paths
|
||||
dtk_path = str(Path("./build/tools/dtk.exe"))
|
||||
|
||||
subprocess.run([dtk_path, "yaz0", "decompress", input_file, "-o", output_file])
|
||||
|
||||
def main() -> None:
|
||||
parser = ArgumentParser(
|
||||
description="TODO"
|
||||
)
|
||||
parser.add_argument("src_path", type=str, help="Binary source file path")
|
||||
parser.add_argument("dest_path", type=str, help="Destination C include file path")
|
||||
args = parser.parse_args()
|
||||
|
||||
src_path = args.src_path
|
||||
dst_path = args.dest_path
|
||||
|
||||
assert(exists(src_path))
|
||||
|
||||
# if we have already made this file, skip
|
||||
# check if the src_file is newer than the output file, means modded arc, worth updating
|
||||
if (exists(dst_path)) and (getmtime(dst_path) > getmtime(src_path)):
|
||||
return
|
||||
|
||||
with open(src_path, "rb") as f:
|
||||
is_compressed = (f.read(4) == b"Yaz0")
|
||||
|
||||
if is_compressed:
|
||||
# if our file is compressed, then we should decompress it
|
||||
# but skip decompressing if it exists
|
||||
# check for modded src tho
|
||||
new_src_path = src_path + ".decompressed"
|
||||
if (not exists(new_src_path)) or (getmtime(new_src_path) < getmtime(src_path)):
|
||||
decompress_file(src_path, new_src_path)
|
||||
src_path = new_src_path
|
||||
|
||||
convert_binary_to_resource_enum(src_path, dst_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue