mm/tools/buildtools/makeyar.py

179 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: © 2023-2024 ZeldaRET
# SPDX-License-Identifier: MIT
# Program to generate compressed yar (Yaz0 ARchive) files.
#
# The program expects an .o elf file and outputs a raw yar binary file and a
# "symbols" elf.
#
# A yar file consists of multiple Yaz0 files compressed individually. The
# archive begins with a header of non-fixed size, which describes the
# location of each individual Yaz0 block within the archive itself. This
# header is followed by each Yaz0 file.
#
# The first word (a 4 byte group) of the header indicates the size in bytes of
# the header itself (also describes the offset of the first Yaz0 block). The
# rest of the header consists of words describing the offsets of each Yaz0
# block relative to the end of the header, because the first Yaz0
# block is omitted from the offsets in the header.
#
# Each Yaz0 block is 0xFF-padded to a multiple of 0x10 in size.
#
# The entire archive is 0-padded to a multiple of 0x10 in size.
#
# The program works by compressing each .data symbol in the input elf file as
# its own Yaz0 compressed file, appending them in order for the generated
# archive. Other elf sections are ignored for the resulting yar file.
#
# The program also outputs an elf file that's identical to the elf input,
# but with its .data section zero'ed out completely. This "symbols" elf can be
# used for referencing each symbol as the whole file were completely
# uncompressed.
from __future__ import annotations
import argparse
import dataclasses
from pathlib import Path
import struct
import crunch64
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
def write_word_as_bytes(buff: bytearray, offset: int, word: int):
struct.pack_into(f">I", buff, offset, word)
@dataclasses.dataclass
class Symbol:
name: str
offset: int
size: int
def get_data_from_elf(elf_path: Path) -> tuple[bytearray, list[Symbol], int]:
uncompressed_data = bytearray()
symbol_list: list[Symbol] = []
data_offset = -1
with elf_path.open("rb") as elfFile:
elf = ELFFile(elfFile)
for section in elf.iter_sections():
if section.name == ".data":
assert len(uncompressed_data) == 0
uncompressed_data.extend(section.data())
assert len(uncompressed_data) == section["sh_size"]
data_offset = section["sh_offset"]
elif section.name == ".symtab":
assert isinstance(section, SymbolTableSection)
for sym in section.iter_symbols():
if sym["st_shndx"] == "SHN_UNDEF":
continue
if sym["st_info"]["type"] != "STT_OBJECT":
continue
symbol_list.append(
Symbol(sym.name, sym["st_value"], sym["st_size"])
)
return uncompressed_data, symbol_list, data_offset
def align_16(val: int) -> int:
return (val + 0xF) & ~0xF
def create_archive(
uncompressed_data: bytearray, symbol_list: list[Symbol]
) -> bytearray:
archive = bytearray()
first_entry_offset = (len(symbol_list) + 1) * 4
# Fill with zeroes until the compressed data start
archive.extend([0] * first_entry_offset)
write_word_as_bytes(archive, 0, first_entry_offset)
offset = first_entry_offset
i = 0
for sym in symbol_list:
uncompressed_size = sym.size
uncompressed_size_aligned = align_16(uncompressed_size)
input_buf = uncompressed_data[sym.offset : sym.offset + uncompressed_size]
# Make sure to pad each entry to a 0x10 boundary
if uncompressed_size_aligned > uncompressed_size:
input_buf.extend([0x00] * (uncompressed_size_aligned - uncompressed_size))
compressed = bytearray(crunch64.yaz0.compress(input_buf))
compressed_size = len(compressed)
# Pad to 0x10
compressed_size_aligned = align_16(compressed_size)
if compressed_size_aligned > compressed_size:
compressed.extend([0xFF] * (compressed_size_aligned - compressed_size))
archive.extend(compressed)
if i > 0:
write_word_as_bytes(archive, i * 4, offset - first_entry_offset)
i += 1
offset += len(compressed)
write_word_as_bytes(archive, i * 4, offset - first_entry_offset)
archive_len = len(archive)
archive_len_aligned = align_16(archive_len)
if archive_len_aligned > archive_len:
archive.extend([0x00] * (archive_len_aligned - archive_len))
return archive
def main():
parser = argparse.ArgumentParser(
description="Program to generate compressed yar (Yaz0 ARchive) files from a built C file. Said file must only contain data symbols that do not reference other symbols (i.e. textures)."
)
parser.add_argument("in_file", help="Path to built .o file")
parser.add_argument(
"out_bin", help="Output path for the generated compressed yar binary"
)
parser.add_argument("out_sym", help="Output path for the generated syms elf file")
args = parser.parse_args()
in_path = Path(args.in_file)
out_bin_path = Path(args.out_bin)
out_sym_path = Path(args.out_sym)
# Delete output files if they already exist
out_bin_path.unlink(missing_ok=True)
out_sym_path.unlink(missing_ok=True)
elf_bytes = bytearray(in_path.read_bytes())
uncompressed_data, symbol_list, data_offset = get_data_from_elf(in_path)
assert len(uncompressed_data) > 0
assert len(symbol_list) > 0
assert data_offset > 0
archive = create_archive(uncompressed_data, symbol_list)
# Write the compressed archive file as a raw binary
out_bin_path.write_bytes(archive)
# Zero out data
for i in range(data_offset, data_offset + len(uncompressed_data)):
elf_bytes[i] = 0
out_sym_path.write_bytes(elf_bytes)
if __name__ == "__main__":
main()